Merge 4.14.86 into android-4.14-p

Changes in 4.14.86
	mm/huge_memory: rename freeze_page() to unmap_page()
	mm/huge_memory.c: reorder operations in __split_huge_page_tail()
	mm/huge_memory: splitting set mapping+index before unfreeze
	mm/huge_memory: fix lockdep complaint on 32-bit i_size_read()
	mm/khugepaged: collapse_shmem() stop if punched or truncated
	mm/khugepaged: fix crashes due to misaccounted holes
	mm/khugepaged: collapse_shmem() remember to clear holes
	mm/khugepaged: minor reorderings in collapse_shmem()
	mm/khugepaged: collapse_shmem() without freezing new_page
	mm/khugepaged: collapse_shmem() do not crash on Compound
	media: em28xx: Fix use-after-free when disconnecting
	ubi: Initialize Fastmap checkmapping correctly
	libceph: store ceph_auth_handshake pointer in ceph_connection
	libceph: factor out __prepare_write_connect()
	libceph: factor out __ceph_x_decrypt()
	libceph: factor out encrypt_authorizer()
	libceph: add authorizer challenge
	libceph: implement CEPHX_V2 calculation mode
	bpf: Prevent memory disambiguation attack
	tls: Add function to update the TLS socket configuration
	tls: Fix TLS ulp context leak, when TLS_TX setsockopt is not used.
	tls: Avoid copying crypto_info again after cipher_type check.
	tls: don't override sk_write_space if tls_set_sw_offload fails.
	tls: Use correct sk->sk_prot for IPV6
	net/tls: Fixed return value when tls_complete_pending_work() fails
	wil6210: missing length check in wmi_set_ie
	btrfs: validate type when reading a chunk
	btrfs: Verify that every chunk has corresponding block group at mount time
	btrfs: Refactor check_leaf function for later expansion
	btrfs: Check if item pointer overlaps with the item itself
	btrfs: Add sanity check for EXTENT_DATA when reading out leaf
	btrfs: Add checker for EXTENT_CSUM
	btrfs: Move leaf and node validation checker to tree-checker.c
	btrfs: tree-checker: Enhance btrfs_check_node output
	btrfs: tree-checker: Fix false panic for sanity test
	btrfs: tree-checker: Add checker for dir item
	btrfs: tree-checker: use %zu format string for size_t
	btrfs: tree-check: reduce stack consumption in check_dir_item
	btrfs: tree-checker: Verify block_group_item
	btrfs: tree-checker: Detect invalid and empty essential trees
	btrfs: Check that each block group has corresponding chunk at mount time
	btrfs: tree-checker: Check level for leaves and nodes
	btrfs: tree-checker: Fix misleading group system information
	f2fs: check blkaddr more accuratly before issue a bio
	f2fs: sanity check on sit entry
	f2fs: enhance sanity_check_raw_super() to avoid potential overflow
	f2fs: clean up with is_valid_blkaddr()
	f2fs: introduce and spread verify_blkaddr
	f2fs: fix to do sanity check with secs_per_zone
	f2fs: Add sanity_check_inode() function
	f2fs: fix to do sanity check with extra_attr feature
	f2fs: fix to do sanity check with user_block_count
	f2fs: fix to do sanity check with node footer and iblocks
	f2fs: fix to do sanity check with block address in main area
	f2fs: fix to do sanity check with i_extra_isize
	f2fs: fix to do sanity check with cp_pack_start_sum
	xfs: don't fail when converting shortform attr to long form during ATTR_REPLACE
	Revert "wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout()"
	net: skb_scrub_packet(): Scrub offload_fwd_mark
	net: thunderx: set xdp_prog to NULL if bpf_prog_add fails
	virtio-net: disable guest csum during XDP set
	virtio-net: fail XDP set if guest csum is negotiated
	net: thunderx: set tso_hdrs pointer to NULL in nicvf_free_snd_queue
	packet: copy user buffers before orphan or clone
	rapidio/rionet: do not free skb before reading its length
	s390/qeth: fix length check in SNMP processing
	usbnet: ipheth: fix potential recvmsg bug and recvmsg bug 2
	sched/core: Fix cpu.max vs. cpuhotplug deadlock
	x86/bugs: Add AMD's variant of SSB_NO
	x86/bugs: Add AMD's SPEC_CTRL MSR usage
	x86/bugs: Switch the selection of mitigation from CPU vendor to CPU features
	x86/bugs: Update when to check for the LS_CFG SSBD mitigation
	x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR
	x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation
	x86/speculation: Apply IBPB more strictly to avoid cross-process data leak
	x86/speculation: Propagate information about RSB filling mitigation to sysfs
	x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant
	x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support
	x86/retpoline: Remove minimal retpoline support
	x86/speculation: Update the TIF_SSBD comment
	x86/speculation: Clean up spectre_v2_parse_cmdline()
	x86/speculation: Remove unnecessary ret variable in cpu_show_common()
	x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common()
	x86/speculation: Disable STIBP when enhanced IBRS is in use
	x86/speculation: Rename SSBD update functions
	x86/speculation: Reorganize speculation control MSRs update
	sched/smt: Make sched_smt_present track topology
	x86/Kconfig: Select SCHED_SMT if SMP enabled
	sched/smt: Expose sched_smt_present static key
	x86/speculation: Rework SMT state change
	x86/l1tf: Show actual SMT state
	x86/speculation: Reorder the spec_v2 code
	x86/speculation: Mark string arrays const correctly
	x86/speculataion: Mark command line parser data __initdata
	x86/speculation: Unify conditional spectre v2 print functions
	x86/speculation: Add command line control for indirect branch speculation
	x86/speculation: Prepare for per task indirect branch speculation control
	x86/process: Consolidate and simplify switch_to_xtra() code
	x86/speculation: Avoid __switch_to_xtra() calls
	x86/speculation: Prepare for conditional IBPB in switch_mm()
	ptrace: Remove unused ptrace_may_access_sched() and MODE_IBRS
	x86/speculation: Split out TIF update
	x86/speculation: Prevent stale SPEC_CTRL msr content
	x86/speculation: Prepare arch_smt_update() for PRCTL mode
	x86/speculation: Add prctl() control for indirect branch speculation
	x86/speculation: Enable prctl mode for spectre_v2_user
	x86/speculation: Add seccomp Spectre v2 user space protection mode
	x86/speculation: Provide IBPB always command line options
	kvm: mmu: Fix race in emulated page table writes
	kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb
	KVM: x86: Fix kernel info-leak in KVM_HC_CLOCK_PAIRING hypercall
	KVM: X86: Fix scan ioapic use-before-initialization
	xtensa: enable coprocessors that are being flushed
	xtensa: fix coprocessor context offset definitions
	xtensa: fix coprocessor part of ptrace_{get,set}xregs
	Btrfs: ensure path name is null terminated at btrfs_control_ioctl
	btrfs: relocation: set trans to be NULL after ending transaction
	PCI: layerscape: Fix wrong invocation of outbound window disable accessor
	arm64: dts: rockchip: Fix PCIe reset polarity for rk3399-puma-haikou.
	x86/MCE/AMD: Fix the thresholding machinery initialization order
	x86/fpu: Disable bottom halves while loading FPU registers
	perf/x86/intel: Move branch tracing setup to the Intel-specific source file
	perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts()
	fs: fix lost error code in dio_complete
	ALSA: wss: Fix invalid snd_free_pages() at error path
	ALSA: ac97: Fix incorrect bit shift at AC97-SPSA control write
	ALSA: control: Fix race between adding and removing a user element
	ALSA: sparc: Fix invalid snd_free_pages() at error path
	ALSA: hda/realtek - Support ALC300
	ALSA: hda/realtek - fix headset mic detection for MSI MS-B171
	ext2: fix potential use after free
	ARM: dts: rockchip: Remove @0 from the veyron memory node
	dmaengine: at_hdmac: fix memory leak in at_dma_xlate()
	dmaengine: at_hdmac: fix module unloading
	btrfs: release metadata before running delayed refs
	staging: vchiq_arm: fix compat VCHIQ_IOC_AWAIT_COMPLETION
	staging: rtl8723bs: Add missing return for cfg80211_rtw_get_station
	USB: usb-storage: Add new IDs to ums-realtek
	usb: core: quirks: add RESET_RESUME quirk for Cherry G230 Stream series
	Revert "usb: dwc3: gadget: skip Set/Clear Halt when invalid"
	iio:st_magn: Fix enable device after trigger
	lib/test_kmod.c: fix rmmod double free
	mm: use swp_offset as key in shmem_replace_page()
	Drivers: hv: vmbus: check the creation_status in vmbus_establish_gpadl()
	misc: mic/scif: fix copy-paste error in scif_create_remote_lookup
	binder: fix race that allows malicious free of live buffer
	libceph: weaken sizeof check in ceph_x_verify_authorizer_reply()
	libceph: check authorizer reply/challenge length before reading
	f2fs: fix missing up_read
	Linux 4.14.86

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2018-12-05 20:07:06 +01:00
119 changed files with 2915 additions and 931 deletions

View File

@@ -3997,9 +3997,13 @@
spectre_v2= [X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
user space attacks.
on - unconditionally enable
off - unconditionally disable
on - unconditionally enable, implies
spectre_v2_user=on
off - unconditionally disable, implies
spectre_v2_user=off
auto - kernel detects whether your CPU model is
vulnerable
@@ -4009,6 +4013,12 @@
CONFIG_RETPOLINE configuration option, and the
compiler with which the kernel was built.
Selecting 'on' will also enable the mitigation
against user space to user space task attacks.
Selecting 'off' will disable both the kernel and
the user space protections.
Specific mitigations can also be selected manually:
retpoline - replace indirect branches
@@ -4018,6 +4028,48 @@
Not specifying this option is equivalent to
spectre_v2=auto.
spectre_v2_user=
[X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability between
user space tasks
on - Unconditionally enable mitigations. Is
enforced by spectre_v2=on
off - Unconditionally disable mitigations. Is
enforced by spectre_v2=off
prctl - Indirect branch speculation is enabled,
but mitigation can be enabled via prctl
per thread. The mitigation control state
is inherited on fork.
prctl,ibpb
- Like "prctl" above, but only STIBP is
controlled per thread. IBPB is issued
always when switching between different user
space processes.
seccomp
- Same as "prctl" above, but all seccomp
threads will enable the mitigation unless
they explicitly opt out.
seccomp,ibpb
- Like "seccomp" above, but only STIBP is
controlled per thread. IBPB is issued
always when switching between different
user space processes.
auto - Kernel selects the mitigation depending on
the available CPU features and vulnerability.
Default mitigation:
If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
Not specifying this option is equivalent to
spectre_v2_user=auto.
spec_store_bypass_disable=
[HW] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)

View File

@@ -92,3 +92,12 @@ Speculation misfeature controls
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
(Mitigate Spectre V2 style attacks against user processes)
Invocations:
* prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 14
SUBLEVEL = 85
SUBLEVEL = 86
EXTRAVERSION =
NAME = Petit Gorille

View File

@@ -47,7 +47,11 @@
#include "rk3288.dtsi"
/ {
memory@0 {
/*
* The default coreboot on veyron devices ignores memory@0 nodes
* and would instead create another memory node.
*/
memory {
device_type = "memory";
reg = <0x0 0x0 0x0 0x80000000>;
};

View File

@@ -130,7 +130,7 @@
};
&pcie0 {
ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
num-lanes = <4>;
pinctrl-names = "default";
pinctrl-0 = <&pcie_clkreqn_cpm>;

View File

@@ -440,10 +440,6 @@ config RETPOLINE
branches. Requires a compiler with -mindirect-branch=thunk-extern
support for full protection. The kernel may run slower.
Without compiler support, at least indirect branches in assembler
code are eliminated. Since this includes the syscall entry path,
it is not entirely pointless.
config INTEL_RDT
bool "Intel Resource Director Technology support"
default n
@@ -959,13 +955,7 @@ config NR_CPUS
approximately eight kilobytes to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
---help---
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
cost of slightly increased overhead in some places. If unsure say
N here.
def_bool y if SMP
config SCHED_MC
def_bool y

View File

@@ -243,9 +243,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
ifneq ($(RETPOLINE_CFLAGS),)
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
ifeq ($(RETPOLINE_CFLAGS),)
$(error You are building kernel with non-retpoline compiler, please update your compiler.)
endif
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
endif
archscripts: scripts_basic

View File

@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
if (config == -1LL)
return -EINVAL;
/*
* Branch tracing:
*/
if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
!attr->freq && hwc->sample_period == 1) {
/* BTS is not supported by this architecture. */
if (!x86_pmu.bts_active)
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
/* disallow bts if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
event->destroy = hw_perf_lbr_event_destroy;
}
hwc->config |= config;
return 0;

View File

@@ -2345,16 +2345,7 @@ done:
static struct event_constraint *
intel_bts_constraints(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
unsigned int hw_event, bts_event;
if (event->attr.freq)
return NULL;
hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
if (unlikely(intel_pmu_has_bts(event)))
return &bts_constraint;
return NULL;
@@ -2973,10 +2964,47 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
return flags;
}
static int intel_pmu_bts_config(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
if (unlikely(intel_pmu_has_bts(event))) {
/* BTS is not supported by this architecture. */
if (!x86_pmu.bts_active)
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
/* disallow bts if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
event->destroy = hw_perf_lbr_event_destroy;
}
return 0;
}
static int core_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
if (ret)
return ret;
return intel_pmu_bts_config(event);
}
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
if (ret)
return ret;
ret = intel_pmu_bts_config(event);
if (ret)
return ret;
@@ -2999,7 +3027,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
/*
* BTS is set up earlier in this path, so don't account twice
*/
if (!intel_pmu_has_bts(event)) {
if (!unlikely(intel_pmu_has_bts(event))) {
/* disallow lbr if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
@@ -3462,7 +3490,7 @@ static __initconst const struct x86_pmu core_pmu = {
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
.hw_config = x86_pmu_hw_config,
.hw_config = core_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,

View File

@@ -850,11 +850,16 @@ static inline int amd_pmu_init(void)
static inline bool intel_pmu_has_bts(struct perf_event *event)
{
if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
!event->attr.freq && event->hw.sample_period == 1)
return true;
struct hw_perf_event *hwc = &event->hw;
unsigned int hw_event, bts_event;
return false;
if (event->attr.freq)
return false;
hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
return hw_event == bts_event && hwc->sample_period == 1;
}
int intel_pmu_save_and_restart(struct perf_event *event);

View File

@@ -284,7 +284,9 @@
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */

View File

@@ -41,9 +41,10 @@
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */

View File

@@ -3,6 +3,8 @@
#ifndef _ASM_X86_NOSPEC_BRANCH_H_
#define _ASM_X86_NOSPEC_BRANCH_H_
#include <linux/static_key.h>
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
@@ -162,29 +164,35 @@
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
#ifdef CONFIG_RETPOLINE
#ifdef CONFIG_X86_64
/*
* Since the inline asm uses the %V modifier which is only in newer GCC,
* the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
* Inline asm uses the %V modifier which is only in newer GCC
* which is ensured when CONFIG_RETPOLINE is defined.
*/
# define CALL_NOSPEC \
ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE( \
ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
"call __x86_indirect_thunk_%V[thunk_target]\n", \
X86_FEATURE_RETPOLINE)
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
X86_FEATURE_RETPOLINE_AMD)
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
#else /* CONFIG_X86_32 */
/*
* For i386 we use the original ret-equivalent retpoline, because
* otherwise we'll run out of registers. We don't care about CET
* here, anyway.
*/
# define CALL_NOSPEC \
ALTERNATIVE( \
ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
" jmp 904f;\n" \
@@ -199,9 +207,14 @@
" ret;\n" \
" .align 16\n" \
"904: call 901b;\n", \
X86_FEATURE_RETPOLINE)
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
X86_FEATURE_RETPOLINE_AMD)
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif
#else /* No retpoline for C / inline asm */
# define CALL_NOSPEC "call *%[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
@@ -210,14 +223,19 @@
/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
SPECTRE_V2_NONE,
SPECTRE_V2_RETPOLINE_MINIMAL,
SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
SPECTRE_V2_IBRS,
SPECTRE_V2_IBRS_ENHANCED,
};
/* The indirect branch speculation control variants */
enum spectre_v2_user_mitigation {
SPECTRE_V2_USER_NONE,
SPECTRE_V2_USER_STRICT,
SPECTRE_V2_USER_PRCTL,
SPECTRE_V2_USER_SECCOMP,
};
/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
@@ -295,6 +313,10 @@ do { \
preempt_enable(); \
} while (0)
DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
#endif /* __ASSEMBLY__ */
/*

View File

@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
{
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
}
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
{
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
}
static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
{
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
}
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
{
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
static inline void speculative_store_bypass_ht_init(void) { }
#endif
extern void speculative_store_bypass_update(unsigned long tif);
static inline void speculative_store_bypass_update_current(void)
{
speculative_store_bypass_update(current_thread_info()->flags);
}
extern void speculation_ctrl_update(unsigned long tif);
extern void speculation_ctrl_update_current(void);
#endif

View File

@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
__visible struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
struct tss_struct;
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
/* This runs runs on the previous thread's stack. */
static inline void prepare_switch_to(struct task_struct *prev,

View File

@@ -81,10 +81,12 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SSBD 5 /* Speculative store bypass disable */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
@@ -112,6 +114,8 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
@@ -147,8 +151,18 @@ struct thread_info {
_TIF_FSCHECK)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_BASE \
(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
*/
#ifdef CONFIG_SMP
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
#else
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
#endif
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)

View File

@@ -185,10 +185,14 @@ struct tlb_state {
#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
/* Last user mm for optimizing IBPB */
union {
struct mm_struct *last_user_mm;
unsigned long last_user_mm_ibpb;
};
u16 loaded_mm_asid;
u16 next_asid;
/* last user mm's ctx id */
u64 last_ctx_id;
/*
* We can be in one of several states:

View File

@@ -554,7 +554,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
nodes_per_socket = ((value >> 3) & 7) + 1;
}
if (c->x86 >= 0x15 && c->x86 <= 0x17) {
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
!boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
c->x86 >= 0x15 && c->x86 <= 0x17) {
unsigned int bit;
switch (c->x86) {

View File

@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
#include <linux/sched/smt.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -34,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
/*
* Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
* writes to SPEC_CTRL contain whatever reserved bits have been set.
*/
u64 __ro_after_init x86_spec_ctrl_base;
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
static DEFINE_MUTEX(spec_ctrl_mutex);
/*
* The vendor and possibly platform specific bits which can be modified in
@@ -54,6 +53,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
u64 __ro_after_init x86_amd_ls_cfg_base;
u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
/* Control conditional STIPB in switch_to() */
DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
/* Control conditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
void __init check_bugs(void)
{
identify_boot_cpu();
@@ -124,31 +130,6 @@ void __init check_bugs(void)
#endif
}
/* The kernel command line selection */
enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_NONE,
SPECTRE_V2_CMD_AUTO,
SPECTRE_V2_CMD_FORCE,
SPECTRE_V2_CMD_RETPOLINE,
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
SPECTRE_V2_CMD_RETPOLINE_AMD,
};
static const char *spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
[SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
[SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
};
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
@@ -166,9 +147,14 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
/* SSBD controlled in MSR_SPEC_CTRL */
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
static_cpu_has(X86_FEATURE_AMD_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
/* Conditional STIBP enabled? */
if (static_branch_unlikely(&switch_to_cond_stibp))
hostval |= stibp_tif_to_spec_ctrl(ti->flags);
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@@ -202,7 +188,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
ssbd_spec_ctrl_to_tif(hostval);
speculative_store_bypass_update(tif);
speculation_ctrl_update(tif);
}
}
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
@@ -217,6 +203,15 @@ static void x86_amd_ssb_disable(void)
wrmsrl(MSR_AMD64_LS_CFG, msrval);
}
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
SPECTRE_V2_USER_NONE;
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -238,23 +233,6 @@ static inline const char *spectre_v2_module_string(void)
static inline const char *spectre_v2_module_string(void) { return ""; }
#endif
static void __init spec2_print_if_insecure(const char *reason)
{
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
pr_info("%s selected on command line.\n", reason);
}
static void __init spec2_print_if_secure(const char *reason)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
pr_info("%s selected on command line.\n", reason);
}
static inline bool retp_compiler(void)
{
return __is_defined(RETPOLINE);
}
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt);
@@ -262,43 +240,210 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
return len == arglen && !strncmp(arg, opt, len);
}
/* The kernel command line selection for spectre v2 */
enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_NONE,
SPECTRE_V2_CMD_AUTO,
SPECTRE_V2_CMD_FORCE,
SPECTRE_V2_CMD_RETPOLINE,
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
SPECTRE_V2_CMD_RETPOLINE_AMD,
};
enum spectre_v2_user_cmd {
SPECTRE_V2_USER_CMD_NONE,
SPECTRE_V2_USER_CMD_AUTO,
SPECTRE_V2_USER_CMD_FORCE,
SPECTRE_V2_USER_CMD_PRCTL,
SPECTRE_V2_USER_CMD_PRCTL_IBPB,
SPECTRE_V2_USER_CMD_SECCOMP,
SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
};
static const char * const spectre_v2_user_strings[] = {
[SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
[SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
[SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
[SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
};
static const struct {
const char *option;
enum spectre_v2_user_cmd cmd;
bool secure;
} v2_user_options[] __initdata = {
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
{ "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
{ "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
{ "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
{ "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
};
static void __init spec_v2_user_print_cond(const char *reason, bool secure)
{
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
static enum spectre_v2_user_cmd __init
spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
{
char arg[20];
int ret, i;
switch (v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
case SPECTRE_V2_CMD_FORCE:
return SPECTRE_V2_USER_CMD_FORCE;
default:
break;
}
ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_USER_CMD_AUTO;
for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
if (match_option(arg, ret, v2_user_options[i].option)) {
spec_v2_user_print_cond(v2_user_options[i].option,
v2_user_options[i].secure);
return v2_user_options[i].cmd;
}
}
pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_USER_CMD_AUTO;
}
static void __init
spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
{
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
bool smt_possible = IS_ENABLED(CONFIG_SMP);
enum spectre_v2_user_cmd cmd;
if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
return;
if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
smt_possible = false;
cmd = spectre_v2_parse_user_cmdline(v2_cmd);
switch (cmd) {
case SPECTRE_V2_USER_CMD_NONE:
goto set_mode;
case SPECTRE_V2_USER_CMD_FORCE:
mode = SPECTRE_V2_USER_STRICT;
break;
case SPECTRE_V2_USER_CMD_PRCTL:
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
mode = SPECTRE_V2_USER_PRCTL;
break;
case SPECTRE_V2_USER_CMD_AUTO:
case SPECTRE_V2_USER_CMD_SECCOMP:
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
if (IS_ENABLED(CONFIG_SECCOMP))
mode = SPECTRE_V2_USER_SECCOMP;
else
mode = SPECTRE_V2_USER_PRCTL;
break;
}
/* Initialize Indirect Branch Prediction Barrier */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
switch (cmd) {
case SPECTRE_V2_USER_CMD_FORCE:
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
static_branch_enable(&switch_mm_always_ibpb);
break;
case SPECTRE_V2_USER_CMD_PRCTL:
case SPECTRE_V2_USER_CMD_AUTO:
case SPECTRE_V2_USER_CMD_SECCOMP:
static_branch_enable(&switch_mm_cond_ibpb);
break;
default:
break;
}
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
static_key_enabled(&switch_mm_always_ibpb) ?
"always-on" : "conditional");
}
/* If enhanced IBRS is enabled no STIPB required */
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
/*
* If SMT is not possible or STIBP is not available clear the STIPB
* mode.
*/
if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
mode = SPECTRE_V2_USER_NONE;
set_mode:
spectre_v2_user = mode;
/* Only print the STIBP mode when SMT possible */
if (smt_possible)
pr_info("%s\n", spectre_v2_user_strings[mode]);
}
static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
[SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
};
static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
} mitigation_options[] = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
{ "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
} mitigation_options[] __initdata = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
{ "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
};
static void __init spec_v2_print_cond(const char *reason, bool secure)
{
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
pr_info("%s selected on command line.\n", reason);
}
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
{
enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
char arg[20];
int ret, i;
enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
return SPECTRE_V2_CMD_NONE;
else {
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
if (!match_option(arg, ret, mitigation_options[i].option))
continue;
cmd = mitigation_options[i].cmd;
break;
}
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
if (ret < 0)
return SPECTRE_V2_CMD_AUTO;
if (i >= ARRAY_SIZE(mitigation_options)) {
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_CMD_AUTO;
}
for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
if (!match_option(arg, ret, mitigation_options[i].option))
continue;
cmd = mitigation_options[i].cmd;
break;
}
if (i >= ARRAY_SIZE(mitigation_options)) {
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
return SPECTRE_V2_CMD_AUTO;
}
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
@@ -315,11 +460,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
if (mitigation_options[i].secure)
spec2_print_if_secure(mitigation_options[i].option);
else
spec2_print_if_insecure(mitigation_options[i].option);
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
}
@@ -375,14 +517,12 @@ retpoline_auto:
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
goto retpoline_generic;
}
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
mode = SPECTRE_V2_RETPOLINE_AMD;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
} else {
retpoline_generic:
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
SPECTRE_V2_RETPOLINE_MINIMAL;
mode = SPECTRE_V2_RETPOLINE_GENERIC;
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}
@@ -401,12 +541,6 @@ specv2_set_mode:
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
/* Initialize Indirect Branch Prediction Barrier if supported */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
/*
* Retpoline means the kernel is safe because it has no indirect
* branches. Enhanced IBRS protects firmware too, so, enable restricted
@@ -422,6 +556,66 @@ specv2_set_mode:
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
/* Set up IBPB and STIBP depending on the general spectre V2 command */
spectre_v2_user_select_mitigation(cmd);
/* Enable STIBP if appropriate */
arch_smt_update();
}
static void update_stibp_msr(void * __unused)
{
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
static void update_stibp_strict(void)
{
u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
if (sched_smt_active())
mask |= SPEC_CTRL_STIBP;
if (mask == x86_spec_ctrl_base)
return;
pr_info("Update user space SMT mitigation: STIBP %s\n",
mask & SPEC_CTRL_STIBP ? "always-on" : "off");
x86_spec_ctrl_base = mask;
on_each_cpu(update_stibp_msr, NULL, 1);
}
/* Update the static key controlling the evaluation of TIF_SPEC_IB */
static void update_indir_branch_cond(void)
{
if (sched_smt_active())
static_branch_enable(&switch_to_cond_stibp);
else
static_branch_disable(&switch_to_cond_stibp);
}
void arch_smt_update(void)
{
/* Enhanced IBRS implies STIBP. No update required. */
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
mutex_lock(&spec_ctrl_mutex);
switch (spectre_v2_user) {
case SPECTRE_V2_USER_NONE:
break;
case SPECTRE_V2_USER_STRICT:
update_stibp_strict();
break;
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
update_indir_branch_cond();
break;
}
mutex_unlock(&spec_ctrl_mutex);
}
#undef pr_fmt
@@ -438,7 +632,7 @@ enum ssb_mitigation_cmd {
SPEC_STORE_BYPASS_CMD_SECCOMP,
};
static const char *ssb_strings[] = {
static const char * const ssb_strings[] = {
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
[SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
[SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
@@ -448,7 +642,7 @@ static const char *ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
} ssb_mitigation_options[] = {
} ssb_mitigation_options[] __initdata = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
@@ -532,18 +726,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
if (mode == SPEC_STORE_BYPASS_DISABLE) {
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
/*
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
* a completely different MSR and bit dependent on family.
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
* use a completely different MSR and bit dependent on family.
*/
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
!static_cpu_has(X86_FEATURE_AMD_SSBD)) {
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
break;
case X86_VENDOR_AMD:
x86_amd_ssb_disable();
break;
}
}
@@ -561,10 +753,25 @@ static void ssb_select_mitigation(void)
#undef pr_fmt
#define pr_fmt(fmt) "Speculation prctl: " fmt
static void task_update_spec_tif(struct task_struct *tsk)
{
/* Force the update of the real TIF bits */
set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
/*
* Immediately update the speculation control MSRs for the current
* task, but for a non-current task delay setting the CPU
* mitigation until it is scheduled next.
*
* This can only happen for SECCOMP mitigation. For PRCTL it's
* always the current task.
*/
if (tsk == current)
speculation_ctrl_update_current();
}
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
bool update;
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
return -ENXIO;
@@ -575,28 +782,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
if (task_spec_ssb_force_disable(task))
return -EPERM;
task_clear_spec_ssb_disable(task);
update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
task_set_spec_ssb_disable(task);
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
task_update_spec_tif(task);
break;
case PR_SPEC_FORCE_DISABLE:
task_set_spec_ssb_disable(task);
task_set_spec_ssb_force_disable(task);
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
task_update_spec_tif(task);
break;
default:
return -ERANGE;
}
return 0;
}
/*
* If being set on non-current task, delay setting the CPU
* mitigation until it is next scheduled.
*/
if (task == current && update)
speculative_store_bypass_update_current();
static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
case PR_SPEC_ENABLE:
if (spectre_v2_user == SPECTRE_V2_USER_NONE)
return 0;
/*
* Indirect branch speculation is always disabled in strict
* mode.
*/
if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
break;
case PR_SPEC_DISABLE:
case PR_SPEC_FORCE_DISABLE:
/*
* Indirect branch speculation is always allowed when
* mitigation is force disabled.
*/
if (spectre_v2_user == SPECTRE_V2_USER_NONE)
return -EPERM;
if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
task_update_spec_tif(task);
break;
default:
return -ERANGE;
}
return 0;
}
@@ -606,6 +841,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_set(task, ctrl);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -616,6 +853,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
@@ -638,11 +877,35 @@ static int ssb_prctl_get(struct task_struct *task)
}
}
static int ib_prctl_get(struct task_struct *task)
{
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return PR_SPEC_NOT_AFFECTED;
switch (spectre_v2_user) {
case SPECTRE_V2_USER_NONE:
return PR_SPEC_ENABLE;
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
case SPECTRE_V2_USER_STRICT:
return PR_SPEC_DISABLE;
default:
return PR_SPEC_NOT_AFFECTED;
}
}
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
{
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_get(task);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_get(task);
default:
return -ENODEV;
}
@@ -780,7 +1043,7 @@ early_param("l1tf", l1tf_cmdline);
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
#if IS_ENABLED(CONFIG_KVM_INTEL)
static const char *l1tf_vmx_states[] = {
static const char * const l1tf_vmx_states[] = {
[VMENTER_L1D_FLUSH_AUTO] = "auto",
[VMENTER_L1D_FLUSH_NEVER] = "vulnerable",
[VMENTER_L1D_FLUSH_COND] = "conditional cache flushes",
@@ -796,13 +1059,14 @@ static ssize_t l1tf_show_state(char *buf)
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
(l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
cpu_smt_control == CPU_SMT_ENABLED))
sched_smt_active())) {
return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation]);
}
return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation],
cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
sched_smt_active() ? "vulnerable" : "disabled");
}
#else
static ssize_t l1tf_show_state(char *buf)
@@ -811,6 +1075,36 @@ static ssize_t l1tf_show_state(char *buf)
}
#endif
static char *stibp_state(void)
{
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return "";
switch (spectre_v2_user) {
case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
return ", STIBP: forced";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
return ", STIBP: conditional";
}
return "";
}
static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
return ", IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
return ", IBPB: conditional";
return ", IBPB: disabled";
}
return "";
}
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -828,9 +1122,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
case X86_BUG_SPECTRE_V2:
return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
spectre_v2_module_string());
case X86_BUG_SPEC_STORE_BYPASS:

View File

@@ -760,6 +760,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_STIBP);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
set_cpu_cap(c, X86_FEATURE_SSBD);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
}
}
void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -958,7 +964,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
!(ia32_cap & ARCH_CAP_SSB_NO))
!(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (x86_match_cpu(cpu_no_speculation))

View File

@@ -56,7 +56,7 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
static bool thresholding_en;
static bool thresholding_irq_en;
static const char * const th_names[] = {
"load_store",
@@ -533,9 +533,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
set_offset:
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
mce_threshold_vector = amd_threshold_interrupt;
if (offset == new)
thresholding_irq_en = true;
done:
mce_threshold_block_init(&b, offset);
@@ -1356,9 +1355,6 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;
if (!thresholding_en)
return 0;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -1376,9 +1372,6 @@ int mce_threshold_create_device(unsigned int cpu)
struct threshold_bank **bp;
int err = 0;
if (!thresholding_en)
return 0;
bp = per_cpu(threshold_banks, cpu);
if (bp)
return 0;
@@ -1407,9 +1400,6 @@ static __init int threshold_init_device(void)
{
unsigned lcpu = 0;
if (mce_threshold_vector == amd_threshold_interrupt)
thresholding_en = true;
/* to hit CPUs online before the notifier is up */
for_each_online_cpu(lcpu) {
int err = mce_threshold_create_device(lcpu);
@@ -1418,6 +1408,9 @@ static __init int threshold_init_device(void)
return err;
}
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt;
return 0;
}
/*

View File

@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
}
local_bh_disable();
fpu->initialized = 1;
preempt_disable();
fpu__restore(fpu);
preempt_enable();
local_bh_enable();
return err;
} else {

View File

@@ -41,6 +41,8 @@
#include <asm/prctl.h>
#include <asm/spec-ctrl.h>
#include "process.h"
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -255,11 +257,12 @@ void arch_setup_new_exec(void)
enable_cpuid();
}
static inline void switch_to_bitmap(struct tss_struct *tss,
struct thread_struct *prev,
static inline void switch_to_bitmap(struct thread_struct *prev,
struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
@@ -398,32 +401,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
}
static __always_inline void intel_set_ssb_state(unsigned long tifn)
/*
* Update the MSRs managing speculation control, during context switch.
*
* tifp: Previous task's thread flags
* tifn: Next task's thread flags
*/
static __always_inline void __speculation_ctrl_update(unsigned long tifp,
unsigned long tifn)
{
u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
unsigned long tif_diff = tifp ^ tifn;
u64 msr = x86_spec_ctrl_base;
bool updmsr = false;
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
/*
* If TIF_SSBD is different, select the proper mitigation
* method. Note that if SSBD mitigation is disabled or permanentely
* enabled this branch can't be taken because nothing can set
* TIF_SSBD.
*/
if (tif_diff & _TIF_SSBD) {
if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
amd_set_ssb_virt_state(tifn);
} else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
amd_set_core_ssb_state(tifn);
} else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
static_cpu_has(X86_FEATURE_AMD_SSBD)) {
msr |= ssbd_tif_to_spec_ctrl(tifn);
updmsr = true;
}
}
/*
* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
* otherwise avoid the MSR write.
*/
if (IS_ENABLED(CONFIG_SMP) &&
static_branch_unlikely(&switch_to_cond_stibp)) {
updmsr |= !!(tif_diff & _TIF_SPEC_IB);
msr |= stibp_tif_to_spec_ctrl(tifn);
}
if (updmsr)
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
}
static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
{
if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
amd_set_ssb_virt_state(tifn);
else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
amd_set_core_ssb_state(tifn);
else
intel_set_ssb_state(tifn);
if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
if (task_spec_ssb_disable(tsk))
set_tsk_thread_flag(tsk, TIF_SSBD);
else
clear_tsk_thread_flag(tsk, TIF_SSBD);
if (task_spec_ib_disable(tsk))
set_tsk_thread_flag(tsk, TIF_SPEC_IB);
else
clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
}
/* Return the updated threadinfo flags*/
return task_thread_info(tsk)->flags;
}
void speculative_store_bypass_update(unsigned long tif)
void speculation_ctrl_update(unsigned long tif)
{
/* Forced update. Make sure all relevant TIF flags are different */
preempt_disable();
__speculative_store_bypass_update(tif);
__speculation_ctrl_update(~tif, tif);
preempt_enable();
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
/* Called from seccomp/prctl update */
void speculation_ctrl_update_current(void)
{
preempt_disable();
speculation_ctrl_update(speculation_ctrl_update_tif(current));
preempt_enable();
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev, *next;
unsigned long tifp, tifn;
@@ -433,7 +489,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
switch_to_bitmap(tss, prev, next, tifp, tifn);
switch_to_bitmap(prev, next, tifp, tifn);
propagate_user_return_notify(prev_p, next_p);
@@ -454,8 +510,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
if ((tifp ^ tifn) & _TIF_SSBD)
__speculative_store_bypass_update(tifn);
if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
__speculation_ctrl_update(tifp, tifn);
} else {
speculation_ctrl_update_tif(prev_p);
tifn = speculation_ctrl_update_tif(next_p);
/* Enforce MSR update to ensure consistent state */
__speculation_ctrl_update(~tifn, tifn);
}
}
/*

39
arch/x86/kernel/process.h Normal file
View File

@@ -0,0 +1,39 @@
// SPDX-License-Identifier: GPL-2.0
//
// Code shared between 32 and 64 bit
#include <asm/spec-ctrl.h>
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
/*
* This needs to be inline to optimize for the common case where no extra
* work needs to be done.
*/
static inline void switch_to_extra(struct task_struct *prev,
struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
unsigned long prev_tif = task_thread_info(prev)->flags;
if (IS_ENABLED(CONFIG_SMP)) {
/*
* Avoid __switch_to_xtra() invocation when conditional
* STIPB is disabled and the only different bit is
* TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
* in the TIF_WORK_CTXSW masks.
*/
if (!static_branch_likely(&switch_to_cond_stibp)) {
prev_tif &= ~_TIF_SPEC_IB;
next_tif &= ~_TIF_SPEC_IB;
}
}
/*
* __switch_to_xtra() handles debug registers, i/o bitmaps,
* speculation mitigations etc.
*/
if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
prev_tif & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev, next);
}

View File

@@ -59,6 +59,8 @@
#include <asm/intel_rdt_sched.h>
#include <asm/proto.h>
#include "process.h"
void __show_regs(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -234,7 +236,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@@ -266,12 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
set_iopl_mask(next->iopl);
/*
* Now maybe handle debug registers and/or IO bitmaps
*/
if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
switch_to_extra(prev_p, next_p);
/*
* Leave lazy mode, flushing any hypercalls made here.

View File

@@ -59,6 +59,8 @@
#include <asm/unistd_32_ia32.h>
#endif
#include "process.h"
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
@@ -400,7 +402,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
@@ -467,12 +468,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Reload sp0. */
update_sp0(next_p);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
*/
if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
__switch_to_xtra(prev_p, next_p);
#ifdef CONFIG_XEN_PV
/*

View File

@@ -367,7 +367,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -649,7 +650,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx |= F(VIRT_SSBD);
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
/*
* The preference is to use SPEC CTRL MSR instead of the
* VIRT_SPEC MSR.
*/
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
!boot_cpu_has(X86_FEATURE_AMD_SSBD))
entry->ebx |= F(VIRT_SSBD);
break;
}

View File

@@ -4734,9 +4734,9 @@ static bool need_remote_flush(u64 old, u64 new)
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
const u8 *new, int *bytes)
int *bytes)
{
u64 gentry;
u64 gentry = 0;
int r;
/*
@@ -4748,22 +4748,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
*gpa &= ~(gpa_t)7;
*bytes = 8;
r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
if (r)
gentry = 0;
new = (const u8 *)&gentry;
}
switch (*bytes) {
case 4:
gentry = *(const u32 *)new;
break;
case 8:
gentry = *(const u64 *)new;
break;
default:
gentry = 0;
break;
if (*bytes == 4 || *bytes == 8) {
r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
if (r)
gentry = 0;
}
return gentry;
@@ -4876,8 +4866,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
/*
* No need to care whether allocation memory is successful
* or not since pte prefetch is skiped if it does not have
@@ -4886,6 +4874,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mmu_topup_memory_caches(vcpu);
spin_lock(&vcpu->kvm->mmu_lock);
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);

View File

@@ -1733,21 +1733,31 @@ out:
return ERR_PTR(err);
}
static void svm_clear_current_vmcb(struct vmcb *vmcb)
{
int i;
for_each_online_cpu(i)
cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
}
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
/*
* The vmcb page can be recycled, causing a false negative in
* svm_vcpu_load(). So, ensure that no logical CPU has this
* vmcb page recorded as its current vmcb.
*/
svm_clear_current_vmcb(svm->vmcb);
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
/*
* The vmcb page can be recycled, causing a false negative in
* svm_vcpu_load(). So do a full IBPB now.
*/
indirect_branch_prediction_barrier();
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -3644,7 +3654,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
return 1;
msr_info->data = svm->spec_ctrl;
@@ -3749,11 +3760,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
svm->spec_ctrl = data;

View File

@@ -6378,6 +6378,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
clock_pairing.nsec = ts.tv_nsec;
clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
clock_pairing.flags = 0;
memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
ret = 0;
if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
@@ -6884,7 +6885,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
else {
if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
vcpu_to_synic(vcpu)->vec_bitmap, 256);

View File

@@ -29,6 +29,12 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
/*
* Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
* stored in cpu_tlb_state.last_user_mm_ibpb.
*/
#define LAST_USER_MM_IBPB 0x1UL
/*
* We get here when we do something requiring a TLB invalidation
* but could not go invalidate all of the contexts. We do the
@@ -180,6 +186,89 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
}
}
static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
return (unsigned long)next->mm | ibpb;
}
static void cond_ibpb(struct task_struct *next)
{
if (!next || !next->mm)
return;
/*
* Both, the conditional and the always IBPB mode use the mm
* pointer to avoid the IBPB when switching between tasks of the
* same process. Using the mm pointer instead of mm->context.ctx_id
* opens a hypothetical hole vs. mm_struct reuse, which is more or
* less impossible to control by an attacker. Aside of that it
* would only affect the first schedule so the theoretically
* exposed data is not really interesting.
*/
if (static_branch_likely(&switch_mm_cond_ibpb)) {
unsigned long prev_mm, next_mm;
/*
* This is a bit more complex than the always mode because
* it has to handle two cases:
*
* 1) Switch from a user space task (potential attacker)
* which has TIF_SPEC_IB set to a user space task
* (potential victim) which has TIF_SPEC_IB not set.
*
* 2) Switch from a user space task (potential attacker)
* which has TIF_SPEC_IB not set to a user space task
* (potential victim) which has TIF_SPEC_IB set.
*
* This could be done by unconditionally issuing IBPB when
* a task which has TIF_SPEC_IB set is either scheduled in
* or out. Though that results in two flushes when:
*
* - the same user space task is scheduled out and later
* scheduled in again and only a kernel thread ran in
* between.
*
* - a user space task belonging to the same process is
* scheduled in after a kernel thread ran in between
*
* - a user space task belonging to the same process is
* scheduled in immediately.
*
* Optimize this with reasonably small overhead for the
* above cases. Mangle the TIF_SPEC_IB bit into the mm
* pointer of the incoming task which is stored in
* cpu_tlbstate.last_user_mm_ibpb for comparison.
*/
next_mm = mm_mangle_tif_spec_ib(next);
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
/*
* Issue IBPB only if the mm's are different and one or
* both have the IBPB bit set.
*/
if (next_mm != prev_mm &&
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
indirect_branch_prediction_barrier();
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
}
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
/*
* Only flush when switching to a user space task with a
* different context than the user space task which ran
* last on this CPU.
*/
if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
indirect_branch_prediction_barrier();
this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
}
}
}
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -248,27 +337,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
} else {
u16 new_asid;
bool need_flush;
u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
/*
* Avoid user/user BTB poisoning by flushing the branch
* predictor when switching between processes. This stops
* one process from doing Spectre-v2 attacks on another.
*
* As an optimization, flush indirect branches only when
* switching into processes that disable dumping. This
* protects high value processes like gpg, without having
* too high performance overhead. IBPB is *expensive*!
*
* This will not flush branches when switching into kernel
* threads. It will also not flush if we switch to idle
* thread and back to the same process. It will flush if we
* switch to a different non-dumpable process.
*/
if (tsk && tsk->mm &&
tsk->mm->context.ctx_id != last_ctx_id &&
get_dumpable(tsk->mm) != SUID_DUMP_USER)
indirect_branch_prediction_barrier();
cond_ibpb(tsk);
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
@@ -318,14 +393,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
/*
* Record last user mm's context id, so we can avoid
* flushing branch buffer with IBPB if we switch back
* to the same user.
*/
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
/* Make sure we write CR3 before loaded_mm. */
barrier();
@@ -406,7 +473,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);

View File

@@ -91,14 +91,14 @@ int main(void)
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XTENSA_HAVE_COPROCESSORS
DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
#endif
DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));

View File

@@ -88,18 +88,21 @@ void coprocessor_release_all(struct thread_info *ti)
void coprocessor_flush_all(struct thread_info *ti)
{
unsigned long cpenable;
unsigned long cpenable, old_cpenable;
int i;
preempt_disable();
RSR_CPENABLE(old_cpenable);
cpenable = ti->cpenable;
WSR_CPENABLE(cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
cpenable >>= 1;
}
WSR_CPENABLE(old_cpenable);
preempt_enable();
}

View File

@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
}
#if XTENSA_HAVE_COPROCESSORS
#define CP_OFFSETS(cp) \
{ \
.elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
.ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
.sz = sizeof(xtregs_ ## cp ## _t), \
}
static const struct {
size_t elf_xtregs_offset;
size_t ti_offset;
size_t sz;
} cp_offsets[] = {
CP_OFFSETS(cp0),
CP_OFFSETS(cp1),
CP_OFFSETS(cp2),
CP_OFFSETS(cp3),
CP_OFFSETS(cp4),
CP_OFFSETS(cp5),
CP_OFFSETS(cp6),
CP_OFFSETS(cp7),
};
#endif
static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
{
struct pt_regs *regs = task_pt_regs(child);
struct thread_info *ti = task_thread_info(child);
elf_xtregs_t __user *xtregs = uregs;
int ret = 0;
int i __maybe_unused;
if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
return -EIO;
@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
#if XTENSA_HAVE_COPROCESSORS
/* Flush all coprocessor registers to memory. */
coprocessor_flush_all(ti);
ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
sizeof(xtregs_coprocessor_t));
for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
ret |= __copy_to_user((char __user *)xtregs +
cp_offsets[i].elf_xtregs_offset,
(const char *)ti +
cp_offsets[i].ti_offset,
cp_offsets[i].sz);
#endif
ret |= __copy_to_user(&xtregs->opt, &regs->xtregs_opt,
sizeof(xtregs->opt));
@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
struct pt_regs *regs = task_pt_regs(child);
elf_xtregs_t *xtregs = uregs;
int ret = 0;
int i __maybe_unused;
if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
return -EFAULT;
@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
coprocessor_flush_all(ti);
coprocessor_release_all(ti);
ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
sizeof(xtregs_coprocessor_t));
for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
(const char __user *)xtregs +
cp_offsets[i].elf_xtregs_offset,
cp_offsets[i].sz);
#endif
ret |= __copy_from_user(&regs->xtregs_opt, &xtregs->opt,
sizeof(xtregs->opt));

View File

@@ -3136,7 +3136,6 @@ static void binder_transaction(struct binder_proc *proc,
t->buffer = NULL;
goto err_binder_alloc_buf_failed;
}
t->buffer->allow_user_free = 0;
t->buffer->debug_id = t->debug_id;
t->buffer->transaction = t;
t->buffer->target_node = target_node;
@@ -3632,14 +3631,18 @@ static int binder_thread_write(struct binder_proc *proc,
buffer = binder_alloc_prepare_to_free(&proc->alloc,
data_ptr);
if (buffer == NULL) {
binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
proc->pid, thread->pid, (u64)data_ptr);
break;
}
if (!buffer->allow_user_free) {
binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
proc->pid, thread->pid, (u64)data_ptr);
if (IS_ERR_OR_NULL(buffer)) {
if (PTR_ERR(buffer) == -EPERM) {
binder_user_error(
"%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
proc->pid, thread->pid,
(u64)data_ptr);
} else {
binder_user_error(
"%d:%d BC_FREE_BUFFER u%016llx no match\n",
proc->pid, thread->pid,
(u64)data_ptr);
}
break;
}
binder_debug(BINDER_DEBUG_FREE_BUFFER,

View File

@@ -149,14 +149,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
else {
/*
* Guard against user threads attempting to
* free the buffer twice
* free the buffer when in use by kernel or
* after it's already been freed.
*/
if (buffer->free_in_progress) {
pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n",
alloc->pid, current->pid, (u64)user_ptr);
return NULL;
}
buffer->free_in_progress = 1;
if (!buffer->allow_user_free)
return ERR_PTR(-EPERM);
buffer->allow_user_free = 0;
return buffer;
}
}
@@ -490,7 +488,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
rb_erase(best_fit, &alloc->free_buffers);
buffer->free = 0;
buffer->free_in_progress = 0;
buffer->allow_user_free = 0;
binder_insert_allocated_buffer_locked(alloc, buffer);
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
"%d: binder_alloc_buf size %zd got %pK\n",

View File

@@ -50,8 +50,7 @@ struct binder_buffer {
unsigned free:1;
unsigned allow_user_free:1;
unsigned async_transaction:1;
unsigned free_in_progress:1;
unsigned debug_id:28;
unsigned debug_id:29;
struct binder_transaction *transaction;

View File

@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan)
atchan->descs_allocated = 0;
atchan->status = 0;
/*
* Free atslave allocated in at_dma_xlate()
*/
kfree(chan->private);
chan->private = NULL;
dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
}
@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);
atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
if (!atslave)
return NULL;
@@ -2000,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev)
struct resource *io;
at_dma_off(atdma);
if (pdev->dev.of_node)
of_dma_controller_free(pdev->dev.of_node);
dma_async_device_unregister(&atdma->dma_common);
dma_pool_destroy(atdma->memset_pool);

View File

@@ -454,6 +454,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
}
wait_for_completion(&msginfo->waitevent);
if (msginfo->response.gpadl_created.creation_status != 0) {
pr_err("Failed to establish GPADL: err = 0x%x\n",
msginfo->response.gpadl_created.creation_status);
ret = -EDQUOT;
goto cleanup;
}
if (channel->rescind) {
ret = -ENODEV;
goto cleanup;

View File

@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
return st_sensors_set_dataready_irq(indio_dev, state);
}
static int st_magn_buffer_preenable(struct iio_dev *indio_dev)
{
return st_sensors_set_enable(indio_dev, true);
}
static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
{
int err;
@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
if (err < 0)
goto st_magn_buffer_postenable_error;
return err;
return st_sensors_set_enable(indio_dev, true);
st_magn_buffer_postenable_error:
kfree(mdata->buffer_data);
@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
int err;
struct st_sensor_data *mdata = iio_priv(indio_dev);
err = iio_triggered_buffer_predisable(indio_dev);
err = st_sensors_set_enable(indio_dev, false);
if (err < 0)
goto st_magn_buffer_predisable_error;
err = st_sensors_set_enable(indio_dev, false);
err = iio_triggered_buffer_predisable(indio_dev);
st_magn_buffer_predisable_error:
kfree(mdata->buffer_data);
@@ -75,7 +70,6 @@ st_magn_buffer_predisable_error:
}
static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
.preenable = &st_magn_buffer_preenable,
.postenable = &st_magn_buffer_postenable,
.predisable = &st_magn_buffer_predisable,
};

View File

@@ -2105,6 +2105,8 @@ static int em28xx_dvb_fini(struct em28xx *dev)
}
}
em28xx_unregister_dvb(dvb);
/* remove I2C SEC */
client = dvb->i2c_client_sec;
if (client) {
@@ -2126,7 +2128,6 @@ static int em28xx_dvb_fini(struct em28xx *dev)
i2c_unregister_device(client);
}
em28xx_unregister_dvb(dvb);
kfree(dvb);
dev->dvb = NULL;
kref_put(&dev->ref, em28xx_free_device);

View File

@@ -417,7 +417,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev,
if (err)
goto error_window;
err = scif_map_page(&window->num_pages_lookup.lookup[j],
vmalloc_dma_phys ?
vmalloc_num_pages ?
vmalloc_to_page(&window->num_pages[i]) :
virt_to_page(&window->num_pages[i]),
remote_dev);

View File

@@ -578,6 +578,16 @@ static int init_volumes(struct ubi_device *ubi,
vol->ubi = ubi;
reserved_pebs += vol->reserved_pebs;
/*
* We use ubi->peb_count and not vol->reserved_pebs because
* we want to keep the code simple. Otherwise we'd have to
* resize/check the bitmap upon volume resize too.
* Allocating a few bytes more does not hurt.
*/
err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
if (err)
return err;
/*
* In case of dynamic volume UBI knows nothing about how many
* data is stored there. So assume the whole volume is used.
@@ -620,16 +630,6 @@ static int init_volumes(struct ubi_device *ubi,
(long long)(vol->used_ebs - 1) * vol->usable_leb_size;
vol->used_bytes += av->last_data_size;
vol->last_eb_bytes = av->last_data_size;
/*
* We use ubi->peb_count and not vol->reserved_pebs because
* we want to keep the code simple. Otherwise we'd have to
* resize/check the bitmap upon volume resize too.
* Allocating a few bytes more does not hurt.
*/
err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
if (err)
return err;
}
/* And add the layout volume */

View File

@@ -1691,6 +1691,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
bool if_up = netif_running(nic->netdev);
struct bpf_prog *old_prog;
bool bpf_attached = false;
int ret = 0;
/* For now just support only the usual MTU sized frames */
if (prog && (dev->mtu > 1500)) {
@@ -1724,8 +1725,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
if (nic->xdp_prog) {
/* Attach BPF program */
nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
if (!IS_ERR(nic->xdp_prog))
if (!IS_ERR(nic->xdp_prog)) {
bpf_attached = true;
} else {
ret = PTR_ERR(nic->xdp_prog);
nic->xdp_prog = NULL;
}
}
/* Calculate Tx queues needed for XDP and network stack */
@@ -1737,7 +1742,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
netif_trans_update(nic->netdev);
}
return 0;
return ret;
}
static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)

View File

@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
if (!sq->dmem.base)
return;
if (sq->tso_hdrs)
if (sq->tso_hdrs) {
dma_free_coherent(&nic->pdev->dev,
sq->dmem.q_len * TSO_HEADER_SIZE,
sq->tso_hdrs, sq->tso_hdrs_phys);
sq->tso_hdrs = NULL;
}
/* Free pending skbs in the queue */
smp_rmb();

View File

@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
* it just report sending a packet to the target
* (without actual packet transfer).
*/
dev_kfree_skb_any(skb);
ndev->stats.tx_packets++;
ndev->stats.tx_bytes += skb->len;
dev_kfree_skb_any(skb);
}
}

View File

@@ -140,7 +140,6 @@ struct ipheth_device {
struct usb_device *udev;
struct usb_interface *intf;
struct net_device *net;
struct sk_buff *tx_skb;
struct urb *tx_urb;
struct urb *rx_urb;
unsigned char *tx_buf;
@@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
case -ENOENT:
case -ECONNRESET:
case -ESHUTDOWN:
case -EPROTO:
return;
case 0:
break;
@@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
dev_err(&dev->intf->dev, "%s: urb status: %d\n",
__func__, status);
dev_kfree_skb_irq(dev->tx_skb);
netif_wake_queue(dev->net);
}
@@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
if (skb->len > IPHETH_BUF_SIZE) {
WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
dev->net->stats.tx_dropped++;
dev_kfree_skb_irq(skb);
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
@@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
__func__, retval);
dev->net->stats.tx_errors++;
dev_kfree_skb_irq(skb);
dev_kfree_skb_any(skb);
} else {
dev->tx_skb = skb;
dev->net->stats.tx_packets++;
dev->net->stats.tx_bytes += skb->len;
dev_consume_skb_any(skb);
netif_stop_queue(net);
}

View File

@@ -61,7 +61,8 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_ECN,
VIRTIO_NET_F_GUEST_UFO
VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_GUEST_CSUM
};
struct virtnet_stats {
@@ -1939,9 +1940,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
if (!vi->guest_offloads)
return 0;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
return virtnet_set_guest_offloads(vi, offloads);
}
@@ -1951,8 +1949,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
if (!vi->guest_offloads)
return 0;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
return virtnet_set_guest_offloads(vi, offloads);
}
@@ -1970,8 +1966,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
&& (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
return -EOPNOTSUPP;
}

View File

@@ -1380,8 +1380,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie)
};
int rc;
u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len;
struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL);
struct wmi_set_appie_cmd *cmd;
if (len < ie_len) {
rc = -EINVAL;
goto out;
}
cmd = kzalloc(len, GFP_KERNEL);
if (!cmd) {
rc = -ENOMEM;
goto out;

View File

@@ -35,7 +35,6 @@
#include "wl12xx_80211.h"
#include "cmd.h"
#include "event.h"
#include "ps.h"
#include "tx.h"
#include "hw_ops.h"
@@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT);
ret = wl1271_ps_elp_wakeup(wl);
if (ret < 0)
return ret;
do {
if (time_after(jiffies, timeout_time)) {
wl1271_debug(DEBUG_CMD, "timeout waiting for event %d",
@@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
} while (!event);
out:
wl1271_ps_elp_sleep(wl);
kfree(events_vector);
return ret;
}

View File

@@ -89,7 +89,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
int i;
for (i = 0; i < PCIE_IATU_NUM; i++)
dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND);
}
static int ls1021_pcie_link_up(struct dw_pcie *pci)

View File

@@ -4545,8 +4545,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
{
struct qeth_ipa_cmd *cmd;
struct qeth_arp_query_info *qinfo;
struct qeth_snmp_cmd *snmp;
unsigned char *data;
void *snmp_data;
__u16 data_len;
QETH_CARD_TEXT(card, 3, "snpcmdcb");
@@ -4554,7 +4554,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
cmd = (struct qeth_ipa_cmd *) sdata;
data = (unsigned char *)((char *)cmd - reply->offset);
qinfo = (struct qeth_arp_query_info *) reply->param;
snmp = &cmd->data.setadapterparms.data.snmp;
if (cmd->hdr.return_code) {
QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
@@ -4567,10 +4566,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
return 0;
}
data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
if (cmd->data.setadapterparms.hdr.seq_no == 1)
data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
else
data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
if (cmd->data.setadapterparms.hdr.seq_no == 1) {
snmp_data = &cmd->data.setadapterparms.data.snmp;
data_len -= offsetof(struct qeth_ipa_cmd,
data.setadapterparms.data.snmp);
} else {
snmp_data = &cmd->data.setadapterparms.data.snmp.request;
data_len -= offsetof(struct qeth_ipa_cmd,
data.setadapterparms.data.snmp.request);
}
/* check if there is enough room in userspace */
if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
@@ -4583,16 +4587,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
QETH_CARD_TEXT_(card, 4, "sseqn%i",
cmd->data.setadapterparms.hdr.seq_no);
/*copy entries to user buffer*/
if (cmd->data.setadapterparms.hdr.seq_no == 1) {
memcpy(qinfo->udata + qinfo->udata_offset,
(char *)snmp,
data_len + offsetof(struct qeth_snmp_cmd, data));
qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
} else {
memcpy(qinfo->udata + qinfo->udata_offset,
(char *)&snmp->request, data_len);
}
memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
qinfo->udata_offset += data_len;
/* check if all replies received ... */
QETH_CARD_TEXT_(card, 4, "srtot%i",
cmd->data.setadapterparms.hdr.used_total);

View File

@@ -1293,7 +1293,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
sinfo->tx_packets = psta->sta_stats.tx_pkts;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
}
/* for Ad-Hoc/AP mode */

View File

@@ -1461,6 +1461,7 @@ vchiq_compat_ioctl_await_completion(struct file *file,
struct vchiq_await_completion32 args32;
struct vchiq_completion_data32 completion32;
unsigned int *msgbufcount32;
unsigned int msgbufcount_native;
compat_uptr_t msgbuf32;
void *msgbuf;
void **msgbufptr;
@@ -1572,7 +1573,11 @@ vchiq_compat_ioctl_await_completion(struct file *file,
sizeof(completion32)))
return -EFAULT;
args32.msgbufcount--;
if (get_user(msgbufcount_native, &args->msgbufcount))
return -EFAULT;
if (!msgbufcount_native)
args32.msgbufcount--;
msgbufcount32 =
&((struct vchiq_await_completion32 __user *)arg)->msgbufcount;

View File

@@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Microsoft LifeCam-VX700 v2.0 */
{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
/* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },

View File

@@ -1511,9 +1511,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
unsigned transfer_in_flight;
unsigned started;
if (dep->flags & DWC3_EP_STALL)
return 0;
if (dep->number > 1)
trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
else
@@ -1535,8 +1532,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
else
dep->flags |= DWC3_EP_STALL;
} else {
if (!(dep->flags & DWC3_EP_STALL))
return 0;
ret = dwc3_send_clear_stall_ep_cmd(dep);
if (ret)

View File

@@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999,
"USB Card Reader",
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999,
"Realtek",
"USB Card Reader",
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999,
"Realtek",
"USB Card Reader",
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
#endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */

View File

@@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o hash.o free-space-tree.o
uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

View File

@@ -50,6 +50,7 @@
#include "sysfs.h"
#include "qgroup.h"
#include "compression.h"
#include "tree-checker.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -544,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
return ret;
}
#define CORRUPT(reason, eb, root, slot) \
btrfs_crit(root->fs_info, \
"corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
btrfs_header_level(eb) == 0 ? "leaf" : "node", \
reason, btrfs_header_bytenr(eb), root->objectid, slot)
static noinline int check_leaf(struct btrfs_root *root,
struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
struct btrfs_key leaf_key;
u32 nritems = btrfs_header_nritems(leaf);
int slot;
/*
* Extent buffers from a relocation tree have a owner field that
* corresponds to the subvolume tree they are based on. So just from an
* extent buffer alone we can not find out what is the id of the
* corresponding subvolume tree, so we can not figure out if the extent
* buffer corresponds to the root of the relocation tree or not. So skip
* this check for relocation trees.
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
struct btrfs_root *check_root;
key.objectid = btrfs_header_owner(leaf);
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
check_root = btrfs_get_fs_root(fs_info, &key, false);
/*
* The only reason we also check NULL here is that during
* open_ctree() some roots has not yet been set up.
*/
if (!IS_ERR_OR_NULL(check_root)) {
struct extent_buffer *eb;
eb = btrfs_root_node(check_root);
/* if leaf is the root, then it's fine */
if (leaf != eb) {
CORRUPT("non-root leaf's nritems is 0",
leaf, check_root, 0);
free_extent_buffer(eb);
return -EIO;
}
free_extent_buffer(eb);
}
return 0;
}
if (nritems == 0)
return 0;
/* Check the 0 item */
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
BTRFS_LEAF_DATA_SIZE(fs_info)) {
CORRUPT("invalid item offset size pair", leaf, root, 0);
return -EIO;
}
/*
* Check to make sure each items keys are in the correct order and their
* offsets make sense. We only have to loop through nritems-1 because
* we check the current slot against the next slot, which verifies the
* next slot's offset+size makes sense and that the current's slot
* offset is correct.
*/
for (slot = 0; slot < nritems - 1; slot++) {
btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
btrfs_item_key_to_cpu(leaf, &key, slot + 1);
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
CORRUPT("bad key order", leaf, root, slot);
return -EIO;
}
/*
* Make sure the offset and ends are right, remember that the
* item data starts at the end of the leaf and grows towards the
* front.
*/
if (btrfs_item_offset_nr(leaf, slot) !=
btrfs_item_end_nr(leaf, slot + 1)) {
CORRUPT("slot offset bad", leaf, root, slot);
return -EIO;
}
/*
* Check to make sure that we don't point outside of the leaf,
* just in case all the items are consistent to each other, but
* all point outside of the leaf.
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(fs_info)) {
CORRUPT("slot end outside of leaf", leaf, root, slot);
return -EIO;
}
}
return 0;
}
static int check_node(struct btrfs_root *root, struct extent_buffer *node)
{
unsigned long nr = btrfs_header_nritems(node);
struct btrfs_key key, next_key;
int slot;
u64 bytenr;
int ret = 0;
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
btrfs_crit(root->fs_info,
"corrupt node: block %llu root %llu nritems %lu",
node->start, root->objectid, nr);
return -EIO;
}
for (slot = 0; slot < nr - 1; slot++) {
bytenr = btrfs_node_blockptr(node, slot);
btrfs_node_key_to_cpu(node, &key, slot);
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
if (!bytenr) {
CORRUPT("invalid item slot", node, root, slot);
ret = -EIO;
goto out;
}
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
CORRUPT("bad key order", node, root, slot);
ret = -EIO;
goto out;
}
}
out:
return ret;
}
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
@@ -749,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
if (found_level == 0 && check_leaf(root, eb)) {
if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
if (found_level > 0 && check_node(root, eb))
if (found_level > 0 && btrfs_check_node(root, eb))
ret = -EIO;
if (!ret)
@@ -4009,7 +3870,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->len,
fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
/*
* Since btrfs_mark_buffer_dirty() can be called with item pointer set
* but item data not updated.
* So here we should only check item pointers, not item data.
*/
if (btrfs_header_level(buf) == 0 &&
btrfs_check_leaf_relaxed(root, buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}

View File

@@ -9828,6 +9828,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
struct btrfs_block_group_item bg;
u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9862,8 +9864,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
"logical %llu len %llu found bg but no related chunk",
found_key.objectid, found_key.offset);
ret = -ENOENT;
} else if (em->start != found_key.objectid ||
em->len != found_key.offset) {
btrfs_err(fs_info,
"block group %llu len %llu mismatch with chunk %llu len %llu",
found_key.objectid, found_key.offset,
em->start, em->len);
ret = -EUCLEAN;
} else {
ret = 0;
read_extent_buffer(leaf, &bg,
btrfs_item_ptr_offset(leaf, slot),
sizeof(bg));
flags = btrfs_block_group_flags(&bg) &
BTRFS_BLOCK_GROUP_TYPE_MASK;
if (flags != (em->map_lookup->type &
BTRFS_BLOCK_GROUP_TYPE_MASK)) {
btrfs_err(fs_info,
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
found_key.objectid,
found_key.offset, flags,
(BTRFS_BLOCK_GROUP_TYPE_MASK &
em->map_lookup->type));
ret = -EUCLEAN;
} else {
ret = 0;
}
}
free_extent_map(em);
goto out;
@@ -10092,6 +10118,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
return cache;
}
/*
* Iterate all chunks and verify that each of them has the corresponding block
* group
*/
static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
{
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct btrfs_block_group_cache *bg;
u64 start = 0;
int ret = 0;
while (1) {
read_lock(&map_tree->map_tree.lock);
/*
* lookup_extent_mapping will return the first extent map
* intersecting the range, so setting @len to 1 is enough to
* get the first chunk.
*/
em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
read_unlock(&map_tree->map_tree.lock);
if (!em)
break;
bg = btrfs_lookup_block_group(fs_info, em->start);
if (!bg) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu doesn't have corresponding block group",
em->start, em->len);
ret = -EUCLEAN;
free_extent_map(em);
break;
}
if (bg->key.objectid != em->start ||
bg->key.offset != em->len ||
(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
em->start, em->len,
em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
bg->key.objectid, bg->key.offset,
bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
ret = -EUCLEAN;
free_extent_map(em);
btrfs_put_block_group(bg);
break;
}
start = em->start + em->len;
free_extent_map(em);
btrfs_put_block_group(bg);
}
return ret;
}
int btrfs_read_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_path *path;
@@ -10264,7 +10346,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
}
init_global_block_rsv(info);
ret = 0;
ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
return ret;

View File

@@ -4048,6 +4048,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans);
trans = NULL;
continue;
}

View File

@@ -2176,6 +2176,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
vol = memdup_user((void __user *)arg, sizeof(*vol));
if (IS_ERR(vol))
return PTR_ERR(vol);
vol->name[BTRFS_PATH_NAME_MAX] = '\0';
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:

View File

@@ -1955,6 +1955,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
return ret;
}
btrfs_trans_release_metadata(trans, fs_info);
trans->block_rsv = NULL;
/* make a pass through all the delayed refs we have so far
* any runnings procs may add more while we are here
*/
@@ -1964,9 +1967,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
return ret;
}
btrfs_trans_release_metadata(trans, fs_info);
trans->block_rsv = NULL;
cur_trans = trans->transaction;
/*

649
fs/btrfs/tree-checker.c Normal file
View File

@@ -0,0 +1,649 @@
/*
* Copyright (C) Qu Wenruo 2017. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program.
*/
/*
* The module is used to catch unexpected/corrupted tree block data.
* Such behavior can be caused either by a fuzzed image or bugs.
*
* The objective is to do leaf/node validation checks when tree block is read
* from disk, and check *every* possible member, so other code won't
* need to checking them again.
*
* Due to the potential and unwanted damage, every checker needs to be
* carefully reviewed otherwise so it does not prevent mount of valid images.
*/
#include "ctree.h"
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
#include "hash.h"
#include "volumes.h"
#define CORRUPT(reason, eb, root, slot) \
btrfs_crit(root->fs_info, \
"corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
btrfs_header_level(eb) == 0 ? "leaf" : "node", \
reason, btrfs_header_bytenr(eb), root->objectid, slot)
/*
* Error message should follow the following format:
* corrupt <type>: <identifier>, <reason>[, <bad_value>]
*
* @type: leaf or node
* @identifier: the necessary info to locate the leaf/node.
* It's recommened to decode key.objecitd/offset if it's
* meaningful.
* @reason: describe the error
* @bad_value: optional, it's recommened to output bad value and its
* expected value (range).
*
* Since comma is used to separate the components, only space is allowed
* inside each component.
*/
/*
* Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
* Allows callers to customize the output.
*/
__printf(4, 5)
static void generic_err(const struct btrfs_root *root,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
va_end(args);
}
static int check_extent_data_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_file_extent_item *fi;
u32 sectorsize = root->fs_info->sectorsize;
u32 item_size = btrfs_item_size_nr(leaf, slot);
if (!IS_ALIGNED(key->offset, sectorsize)) {
CORRUPT("unaligned key offset for file extent",
leaf, root, slot);
return -EUCLEAN;
}
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
CORRUPT("invalid file extent type", leaf, root, slot);
return -EUCLEAN;
}
/*
* Support for new compression/encrption must introduce incompat flag,
* and must be caught in open_ctree().
*/
if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
CORRUPT("invalid file extent compression", leaf, root, slot);
return -EUCLEAN;
}
if (btrfs_file_extent_encryption(leaf, fi)) {
CORRUPT("invalid file extent encryption", leaf, root, slot);
return -EUCLEAN;
}
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
/* Inline extent must have 0 as key offset */
if (key->offset) {
CORRUPT("inline extent has non-zero key offset",
leaf, root, slot);
return -EUCLEAN;
}
/* Compressed inline extent has no on-disk size, skip it */
if (btrfs_file_extent_compression(leaf, fi) !=
BTRFS_COMPRESS_NONE)
return 0;
/* Uncompressed inline extent size must match item size */
if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
btrfs_file_extent_ram_bytes(leaf, fi)) {
CORRUPT("plaintext inline extent has invalid size",
leaf, root, slot);
return -EUCLEAN;
}
return 0;
}
/* Regular or preallocated extent has fixed item size */
if (item_size != sizeof(*fi)) {
CORRUPT(
"regluar or preallocated extent data item size is invalid",
leaf, root, slot);
return -EUCLEAN;
}
if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
!IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
!IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
!IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
!IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
CORRUPT(
"regular or preallocated extent data item has unaligned value",
leaf, root, slot);
return -EUCLEAN;
}
return 0;
}
static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
u32 sectorsize = root->fs_info->sectorsize;
u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
CORRUPT("invalid objectid for csum item", leaf, root, slot);
return -EUCLEAN;
}
if (!IS_ALIGNED(key->offset, sectorsize)) {
CORRUPT("unaligned key offset for csum item", leaf, root, slot);
return -EUCLEAN;
}
if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
CORRUPT("unaligned csum item size", leaf, root, slot);
return -EUCLEAN;
}
return 0;
}
/*
* Customized reported for dir_item, only important new info is key->objectid,
* which represents inode number
*/
__printf(4, 5)
static void dir_item_err(const struct btrfs_root *root,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
struct btrfs_key key;
struct va_format vaf;
va_list args;
btrfs_item_key_to_cpu(eb, &key, slot);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
va_end(args);
}
static int check_dir_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_dir_item *di;
u32 item_size = btrfs_item_size_nr(leaf, slot);
u32 cur = 0;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
while (cur < item_size) {
u32 name_len;
u32 data_len;
u32 max_name_len;
u32 total_size;
u32 name_hash;
u8 dir_type;
/* header itself should not cross item boundary */
if (cur + sizeof(*di) > item_size) {
dir_item_err(root, leaf, slot,
"dir item header crosses item boundary, have %zu boundary %u",
cur + sizeof(*di), item_size);
return -EUCLEAN;
}
/* dir type check */
dir_type = btrfs_dir_type(leaf, di);
if (dir_type >= BTRFS_FT_MAX) {
dir_item_err(root, leaf, slot,
"invalid dir item type, have %u expect [0, %u)",
dir_type, BTRFS_FT_MAX);
return -EUCLEAN;
}
if (key->type == BTRFS_XATTR_ITEM_KEY &&
dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
"invalid dir item type for XATTR key, have %u expect %u",
dir_type, BTRFS_FT_XATTR);
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR &&
key->type != BTRFS_XATTR_ITEM_KEY) {
dir_item_err(root, leaf, slot,
"xattr dir type found for non-XATTR key");
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR)
max_name_len = XATTR_NAME_MAX;
else
max_name_len = BTRFS_NAME_LEN;
/* Name/data length check */
name_len = btrfs_dir_name_len(leaf, di);
data_len = btrfs_dir_data_len(leaf, di);
if (name_len > max_name_len) {
dir_item_err(root, leaf, slot,
"dir item name len too long, have %u max %u",
name_len, max_name_len);
return -EUCLEAN;
}
if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
dir_item_err(root, leaf, slot,
"dir item name and data len too long, have %u max %u",
name_len + data_len,
BTRFS_MAX_XATTR_SIZE(root->fs_info));
return -EUCLEAN;
}
if (data_len && dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
"dir item with invalid data len, have %u expect 0",
data_len);
return -EUCLEAN;
}
total_size = sizeof(*di) + name_len + data_len;
/* header and name/data should not cross item boundary */
if (cur + total_size > item_size) {
dir_item_err(root, leaf, slot,
"dir item data crosses item boundary, have %u boundary %u",
cur + total_size, item_size);
return -EUCLEAN;
}
/*
* Special check for XATTR/DIR_ITEM, as key->offset is name
* hash, should match its name
*/
if (key->type == BTRFS_DIR_ITEM_KEY ||
key->type == BTRFS_XATTR_ITEM_KEY) {
char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
read_extent_buffer(leaf, namebuf,
(unsigned long)(di + 1), name_len);
name_hash = btrfs_name_hash(namebuf, name_len);
if (key->offset != name_hash) {
dir_item_err(root, leaf, slot,
"name hash mismatch with key, have 0x%016x expect 0x%016llx",
name_hash, key->offset);
return -EUCLEAN;
}
}
cur += total_size;
di = (struct btrfs_dir_item *)((void *)di + total_size);
}
return 0;
}
__printf(4, 5)
__cold
static void block_group_err(const struct btrfs_fs_info *fs_info,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
struct btrfs_key key;
struct va_format vaf;
va_list args;
btrfs_item_key_to_cpu(eb, &key, slot);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
key.objectid, key.offset, &vaf);
va_end(args);
}
static int check_block_group_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_block_group_item bgi;
u32 item_size = btrfs_item_size_nr(leaf, slot);
u64 flags;
u64 type;
/*
* Here we don't really care about alignment since extent allocator can
* handle it. We care more about the size, as if one block group is
* larger than maximum size, it's must be some obvious corruption.
*/
if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
block_group_err(fs_info, leaf, slot,
"invalid block group size, have %llu expect (0, %llu]",
key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
return -EUCLEAN;
}
if (item_size != sizeof(bgi)) {
block_group_err(fs_info, leaf, slot,
"invalid item size, have %u expect %zu",
item_size, sizeof(bgi));
return -EUCLEAN;
}
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
sizeof(bgi));
if (btrfs_block_group_chunk_objectid(&bgi) !=
BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
block_group_err(fs_info, leaf, slot,
"invalid block group chunk objectid, have %llu expect %llu",
btrfs_block_group_chunk_objectid(&bgi),
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
return -EUCLEAN;
}
if (btrfs_block_group_used(&bgi) > key->offset) {
block_group_err(fs_info, leaf, slot,
"invalid block group used, have %llu expect [0, %llu)",
btrfs_block_group_used(&bgi), key->offset);
return -EUCLEAN;
}
flags = btrfs_block_group_flags(&bgi);
if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
block_group_err(fs_info, leaf, slot,
"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
return -EUCLEAN;
}
type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
if (type != BTRFS_BLOCK_GROUP_DATA &&
type != BTRFS_BLOCK_GROUP_METADATA &&
type != BTRFS_BLOCK_GROUP_SYSTEM &&
type != (BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA)) {
block_group_err(fs_info, leaf, slot,
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
type, hweight64(type),
BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
return -EUCLEAN;
}
return 0;
}
/*
* Common point to switch the item-specific validation.
*/
static int check_leaf_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
int ret = 0;
switch (key->type) {
case BTRFS_EXTENT_DATA_KEY:
ret = check_extent_data_item(root, leaf, key, slot);
break;
case BTRFS_EXTENT_CSUM_KEY:
ret = check_csum_item(root, leaf, key, slot);
break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
case BTRFS_XATTR_ITEM_KEY:
ret = check_dir_item(root, leaf, key, slot);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
ret = check_block_group_item(root->fs_info, leaf, key, slot);
break;
}
return ret;
}
static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
bool check_item_data)
{
struct btrfs_fs_info *fs_info = root->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
struct btrfs_key prev_key = {0, 0, 0};
struct btrfs_key key;
u32 nritems = btrfs_header_nritems(leaf);
int slot;
if (btrfs_header_level(leaf) != 0) {
generic_err(root, leaf, 0,
"invalid level for leaf, have %d expect 0",
btrfs_header_level(leaf));
return -EUCLEAN;
}
/*
* Extent buffers from a relocation tree have a owner field that
* corresponds to the subvolume tree they are based on. So just from an
* extent buffer alone we can not find out what is the id of the
* corresponding subvolume tree, so we can not figure out if the extent
* buffer corresponds to the root of the relocation tree or not. So
* skip this check for relocation trees.
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
u64 owner = btrfs_header_owner(leaf);
struct btrfs_root *check_root;
/* These trees must never be empty */
if (owner == BTRFS_ROOT_TREE_OBJECTID ||
owner == BTRFS_CHUNK_TREE_OBJECTID ||
owner == BTRFS_EXTENT_TREE_OBJECTID ||
owner == BTRFS_DEV_TREE_OBJECTID ||
owner == BTRFS_FS_TREE_OBJECTID ||
owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
generic_err(root, leaf, 0,
"invalid root, root %llu must never be empty",
owner);
return -EUCLEAN;
}
key.objectid = owner;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
check_root = btrfs_get_fs_root(fs_info, &key, false);
/*
* The only reason we also check NULL here is that during
* open_ctree() some roots has not yet been set up.
*/
if (!IS_ERR_OR_NULL(check_root)) {
struct extent_buffer *eb;
eb = btrfs_root_node(check_root);
/* if leaf is the root, then it's fine */
if (leaf != eb) {
CORRUPT("non-root leaf's nritems is 0",
leaf, check_root, 0);
free_extent_buffer(eb);
return -EUCLEAN;
}
free_extent_buffer(eb);
}
return 0;
}
if (nritems == 0)
return 0;
/*
* Check the following things to make sure this is a good leaf, and
* leaf users won't need to bother with similar sanity checks:
*
* 1) key ordering
* 2) item offset and size
* No overlap, no hole, all inside the leaf.
* 3) item content
* If possible, do comprehensive sanity check.
* NOTE: All checks must only rely on the item data itself.
*/
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
int ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
CORRUPT("bad key order", leaf, root, slot);
return -EUCLEAN;
}
/*
* Make sure the offset and ends are right, remember that the
* item data starts at the end of the leaf and grows towards the
* front.
*/
if (slot == 0)
item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
else
item_end_expected = btrfs_item_offset_nr(leaf,
slot - 1);
if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
CORRUPT("slot offset bad", leaf, root, slot);
return -EUCLEAN;
}
/*
* Check to make sure that we don't point outside of the leaf,
* just in case all the items are consistent to each other, but
* all point outside of the leaf.
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(fs_info)) {
CORRUPT("slot end outside of leaf", leaf, root, slot);
return -EUCLEAN;
}
/* Also check if the item pointer overlaps with btrfs item. */
if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
btrfs_item_ptr_offset(leaf, slot)) {
CORRUPT("slot overlap with its data", leaf, root, slot);
return -EUCLEAN;
}
if (check_item_data) {
/*
* Check if the item size and content meet other
* criteria
*/
ret = check_leaf_item(root, leaf, &key, slot);
if (ret < 0)
return ret;
}
prev_key.objectid = key.objectid;
prev_key.type = key.type;
prev_key.offset = key.offset;
}
return 0;
}
int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
{
return check_leaf(root, leaf, true);
}
int btrfs_check_leaf_relaxed(struct btrfs_root *root,
struct extent_buffer *leaf)
{
return check_leaf(root, leaf, false);
}
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
{
unsigned long nr = btrfs_header_nritems(node);
struct btrfs_key key, next_key;
int slot;
int level = btrfs_header_level(node);
u64 bytenr;
int ret = 0;
if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
generic_err(root, node, 0,
"invalid level for node, have %d expect [1, %d]",
level, BTRFS_MAX_LEVEL - 1);
return -EUCLEAN;
}
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
btrfs_crit(root->fs_info,
"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
root->objectid, node->start,
nr == 0 ? "small" : "large", nr,
BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
return -EUCLEAN;
}
for (slot = 0; slot < nr - 1; slot++) {
bytenr = btrfs_node_blockptr(node, slot);
btrfs_node_key_to_cpu(node, &key, slot);
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
if (!bytenr) {
generic_err(root, node, slot,
"invalid NULL node pointer");
ret = -EUCLEAN;
goto out;
}
if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
generic_err(root, node, slot,
"unaligned pointer, have %llu should be aligned to %u",
bytenr, root->fs_info->sectorsize);
ret = -EUCLEAN;
goto out;
}
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
generic_err(root, node, slot,
"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
key.objectid, key.type, key.offset,
next_key.objectid, next_key.type,
next_key.offset);
ret = -EUCLEAN;
goto out;
}
}
out:
return ret;
}

38
fs/btrfs/tree-checker.h Normal file
View File

@@ -0,0 +1,38 @@
/*
* Copyright (C) Qu Wenruo 2017. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program.
*/
#ifndef __BTRFS_TREE_CHECKER__
#define __BTRFS_TREE_CHECKER__
#include "ctree.h"
#include "extent_io.h"
/*
* Comprehensive leaf checker.
* Will check not only the item pointers, but also every possible member
* in item data.
*/
int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
/*
* Less strict leaf checker.
* Will only check item pointers, not reading item data.
*/
int btrfs_check_leaf_relaxed(struct btrfs_root *root,
struct extent_buffer *leaf);
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
#endif

View File

@@ -4647,7 +4647,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_stripe_size = SZ_1G;
max_chunk_size = 10 * max_stripe_size;
max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
@@ -6353,6 +6353,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
u16 num_stripes;
u16 sub_stripes;
u64 type;
u64 features;
bool mixed = false;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
@@ -6391,6 +6393,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
return -EIO;
}
if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
(type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
btrfs_err(fs_info,
"system chunk with data or metadata type: 0x%llx", type);
return -EIO;
}
features = btrfs_super_incompat_flags(fs_info->super_copy);
if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
mixed = true;
if (!mixed) {
if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
(type & BTRFS_BLOCK_GROUP_DATA)) {
btrfs_err(fs_info,
"mixed chunk type in non-mixed mode: 0x%llx", type);
return -EIO;
}
}
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||

View File

@@ -24,6 +24,8 @@
#include <linux/btrfs.h>
#include "async-thread.h"
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K

View File

@@ -4079,6 +4079,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
static int add_authorizer_challenge(struct ceph_connection *con,
void *challenge_buf, int challenge_buf_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
challenge_buf, challenge_buf_len);
}
static int verify_authorizer_reply(struct ceph_connection *con)
{
@@ -4142,6 +4152,7 @@ static const struct ceph_connection_operations mds_con_ops = {
.put = con_put,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
.add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,

View File

@@ -304,8 +304,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
*/
dio->iocb->ki_pos += transferred;
if (dio->op == REQ_OP_WRITE)
ret = generic_write_sync(dio->iocb, transferred);
if (ret > 0 && dio->op == REQ_OP_WRITE)
ret = generic_write_sync(dio->iocb, ret);
dio->iocb->ki_complete(dio->iocb, ret, 0);
}

View File

@@ -612,9 +612,9 @@ skip_replace:
}
cleanup:
brelse(bh);
if (!(bh && header == HDR(bh)))
kfree(header);
brelse(bh);
up_write(&EXT2_I(inode)->xattr_sem);
return error;

View File

@@ -85,8 +85,10 @@ repeat:
fio.page = page;
if (f2fs_submit_page_bio(&fio)) {
f2fs_put_page(page, 1);
goto repeat;
memset(page_address(page), 0, PAGE_SIZE);
f2fs_stop_checkpoint(sbi, false);
f2fs_bug_on(sbi, 1);
return page;
}
lock_page(page);
@@ -117,7 +119,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, false);
}
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
@@ -137,8 +140,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
return false;
break;
case META_POR:
case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi)))
blkaddr < MAIN_BLKADDR(sbi))) {
if (type == DATA_GENERIC) {
f2fs_msg(sbi->sb, KERN_WARNING,
"access invalid blkaddr:%u", blkaddr);
WARN_ON(1);
}
return false;
}
break;
case META_GENERIC:
if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
@@ -173,7 +188,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
if (!is_valid_blkaddr(sbi, blkno, type))
if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -774,6 +789,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
&cp_page_1, version);
if (err)
return NULL;
if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
sbi->blocks_per_seg) {
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid cp_pack_total_block_count:%u",
le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
}
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
@@ -837,15 +860,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
/* Sanity checking of checkpoint */
if (sanity_check_ckpt(sbi))
goto free_fail_no_cp;
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
/* Sanity checking of checkpoint */
if (sanity_check_ckpt(sbi))
goto free_fail_no_cp;
if (cp_blks <= 1)
goto done;

View File

@@ -440,7 +440,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
verify_block_addr(fio, fio->new_blkaddr);
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
return -EFAULT;
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -485,7 +488,7 @@ next:
spin_unlock(&io->io_lock);
}
if (fio->old_blkaddr != NEW_ADDR)
if (__is_valid_data_blkaddr(fio->old_blkaddr))
verify_block_addr(fio, fio->old_blkaddr);
verify_block_addr(fio, fio->new_blkaddr);
@@ -1045,7 +1048,13 @@ next_dnode:
next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
err = -EFAULT;
goto sync_out;
}
if (!is_valid_data_blkaddr(sbi, blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1495,6 +1504,10 @@ got_it:
SetPageUptodate(page);
goto confused;
}
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC))
goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
@@ -1680,15 +1693,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
return should_update_inplace(inode, fio);
}
static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
{
if (fio->old_blkaddr == NEW_ADDR)
return false;
if (fio->old_blkaddr == NULL_ADDR)
return false;
return true;
}
int do_write_data_page(struct f2fs_io_info *fio)
{
struct page *page = fio->page;
@@ -1703,11 +1707,13 @@ int do_write_data_page(struct f2fs_io_info *fio)
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (valid_ipu_blkaddr(fio)) {
ipu_force = true;
fio->need_lock = LOCK_DONE;
goto got_it;
}
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC))
return -EFAULT;
ipu_force = true;
fio->need_lock = LOCK_DONE;
goto got_it;
}
/* Deadlock due to between page->lock and f2fs_lock_op */
@@ -1726,11 +1732,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
goto out_writepage;
}
got_it:
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC)) {
err = -EFAULT;
goto out_writepage;
}
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
goto out_writepage;

View File

@@ -193,7 +193,7 @@ struct cp_control {
};
/*
* For CP/NAT/SIT/SSA readahead
* indicate meta/data type
*/
enum {
META_CP,
@@ -201,6 +201,8 @@ enum {
META_SIT,
META_SSA,
META_POR,
DATA_GENERIC,
META_GENERIC,
};
/* for the list of ino */
@@ -2599,6 +2601,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
(!is_read_io(fio->op) || fio->is_meta))
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
f2fs_msg(sbi->sb, KERN_ERR,
"invalid blkaddr: %u, type: %d, run fsck to fix.",
blkaddr, type);
f2fs_bug_on(sbi, 1);
}
}
static inline bool __is_valid_data_blkaddr(block_t blkaddr)
{
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return false;
return true;
}
static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr)
{
if (!__is_valid_data_blkaddr(blkaddr))
return false;
verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
return true;
}
/*
* file.c
*/
@@ -2817,7 +2852,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);

View File

@@ -345,13 +345,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
return pgofs;
}
static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
int whence)
static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
(blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -414,7 +414,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
if (__found_offset(blkaddr, dirty, pgofs, whence)) {
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
blkaddr, DATA_GENERIC)) {
f2fs_put_dnode(&dn);
goto fail;
}
if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -506,6 +514,11 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR;
set_data_blkaddr(dn);
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
continue;
invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);

View File

@@ -68,11 +68,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
static bool __written_first_block(struct f2fs_inode *ri)
static bool __written_first_block(struct f2fs_sb_info *sbi,
struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
if (addr != NEW_ADDR && addr != NULL_ADDR)
if (is_valid_data_blkaddr(sbi, addr))
return true;
return false;
}
@@ -185,6 +186,72 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned long long iblocks;
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
if (!iblocks) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
"run fsck to fix.",
__func__, inode->i_ino, iblocks);
return false;
}
if (ino_of_node(node_page) != nid_of_node(node_page)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: corrupted inode footer i_ino=%lx, ino,nid: "
"[%u, %u] run fsck to fix.",
__func__, inode->i_ino,
ino_of_node(node_page), nid_of_node(node_page));
return false;
}
if (f2fs_has_extra_attr(inode) &&
!f2fs_sb_has_extra_attr(sbi->sb)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) is with extra_attr, "
"but extra_attr feature is off",
__func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
"max: %zu",
__func__, inode->i_ino, fi->i_extra_isize,
F2FS_TOTAL_EXTRA_ATTR_SIZE);
return false;
}
if (F2FS_I(inode)->extent_tree) {
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (ei->len &&
(!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
DATA_GENERIC))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) extent info [%u, %u, %u] "
"is incorrect, run fsck to fix",
__func__, inode->i_ino,
ei->blk, ei->fofs, ei->len);
return false;
}
}
return true;
}
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -234,21 +301,9 @@ static int do_read_inode(struct inode *inode)
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
} else {
/*
* Previous inline data or directory always reserved 200 bytes
* in inode layout, even if inline_xattr is disabled. In order
* to keep inline_dentry's structure for backward compatibility,
* we get the space back only from inline_data.
*/
fi->i_inline_xattr_size = 0;
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
return -EINVAL;
}
/* check data exist */
@@ -258,7 +313,7 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
if (__written_first_block(ri))
if (__written_first_block(sbi, ri))
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
if (!need_inode_block_update(sbi, inode->i_ino))

View File

@@ -379,8 +379,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
nat_get_blkaddr(e) != NULL_ADDR &&
f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -391,7 +390,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -1411,6 +1410,12 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
return 0;
}
if (__is_valid_data_blkaddr(ni.blk_addr) &&
!f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
up_read(&sbi->node_write);
goto redirty_out;
}
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;

View File

@@ -255,7 +255,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
while (1) {
struct fsync_inode_entry *entry;
if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = get_tmp_page(sbi, blkaddr);
@@ -509,7 +509,7 @@ retry_dn:
}
/* dest is valid block, try to recover from src to dest */
if (is_valid_blkaddr(sbi, dest, META_POR)) {
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = reserve_new_block(&dn);
@@ -570,7 +570,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
ra_meta_pages_cond(sbi, blkaddr);

View File

@@ -1892,7 +1892,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
struct seg_entry *se;
bool is_cp = false;
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -2953,7 +2953,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct page *cpage;
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
@@ -3671,6 +3671,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int old_valid_blocks;
start = le32_to_cpu(segno_in_journal(journal, i));
if (start >= MAIN_SEGS(sbi)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong journal entry on segno %u",
start);
set_sbi_flag(sbi, SBI_NEED_FSCK);
err = -EINVAL;
break;
}
se = &sit_i->sentries[start];
sit = sit_in_journal(journal, i);

View File

@@ -85,7 +85,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -645,13 +645,10 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
struct f2fs_sb_info *sbi = fio->sbi;
if (PAGE_TYPE_OF_BIO(fio->type) == META &&
(!is_read_io(fio->op) || fio->is_meta))
BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
blk_addr >= MAIN_BLKADDR(sbi));
if (__is_meta_io(fio))
verify_blkaddr(sbi, blk_addr, META_GENERIC);
else
BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
blk_addr >= MAX_BLKADDR(sbi));
verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*

View File

@@ -2152,6 +2152,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
struct buffer_head *bh)
{
block_t segment_count, segs_per_sec, secs_per_zone;
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
struct super_block *sb = sbi->sb;
@@ -2208,6 +2210,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
segment_count = le32_to_cpu(raw_super->segment_count);
segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
total_sections = le32_to_cpu(raw_super->section_count);
/* blocks_per_seg should be 512, given the above check */
blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
if (segment_count > F2FS_MAX_SEGMENT ||
segment_count < F2FS_MIN_SEGMENTS) {
f2fs_msg(sb, KERN_INFO,
"Invalid segment count (%u)",
segment_count);
return 1;
}
if (total_sections > segment_count ||
total_sections < F2FS_MIN_SEGMENTS ||
segs_per_sec > segment_count || !segs_per_sec) {
f2fs_msg(sb, KERN_INFO,
"Invalid segment/section count (%u, %u x %u)",
segment_count, total_sections, segs_per_sec);
return 1;
}
if ((segment_count / segs_per_sec) < total_sections) {
f2fs_msg(sb, KERN_INFO,
"Small segment_count (%u < %u * %u)",
segment_count, segs_per_sec, total_sections);
return 1;
}
if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
f2fs_msg(sb, KERN_INFO,
"Wrong segment_count / block_count (%u > %u)",
segment_count, le32_to_cpu(raw_super->block_count));
return 1;
}
if (secs_per_zone > total_sections || !secs_per_zone) {
f2fs_msg(sb, KERN_INFO,
"Wrong secs_per_zone / total_sections (%u, %u)",
secs_per_zone, total_sections);
return 1;
}
if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
f2fs_msg(sb, KERN_INFO,
"Corrupted extension count (%u > %u)",
le32_to_cpu(raw_super->extension_count),
F2FS_MAX_EXTENSION);
return 1;
}
if (le32_to_cpu(raw_super->cp_payload) >
(blocks_per_seg - F2FS_CP_PACKS)) {
f2fs_msg(sb, KERN_INFO,
"Insane cp_payload (%u > %u)",
le32_to_cpu(raw_super->cp_payload),
blocks_per_seg - F2FS_CP_PACKS);
return 1;
}
/* check reserved ino info */
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
@@ -2220,13 +2284,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
f2fs_msg(sb, KERN_INFO,
"Invalid segment count (%u)",
le32_to_cpu(raw_super->segment_count));
return 1;
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sbi, bh))
return 1;
@@ -2244,6 +2301,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int sit_segs, nat_segs;
unsigned int sit_bitmap_size, nat_bitmap_size;
unsigned int log_blocks_per_seg;
unsigned int segment_count_main;
unsigned int cp_pack_start_sum, cp_payload;
block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
@@ -2268,6 +2328,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
user_block_count = le64_to_cpu(ckpt->user_block_count);
segment_count_main = le32_to_cpu(raw_super->segment_count_main);
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (!user_block_count || user_block_count >=
segment_count_main << log_blocks_per_seg) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong user_block_count: %u", user_block_count);
return 1;
}
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -2284,7 +2354,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
@@ -2294,6 +2363,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
cp_pack_start_sum = __start_sum_addr(sbi);
cp_payload = __cp_payload(sbi);
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
NR_CURSEG_TYPE) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong cp_pack_start_sum: %u",
cp_pack_start_sum);
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;

View File

@@ -501,7 +501,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
ASSERT(retval == 0);
if (retval)
return retval;
/*
* Since we have removed the old attr, clear ATTR_REPLACE so
* that the leaf format add routine won't trip over the attr
* not being around.
*/
args->flags &= ~ATTR_REPLACE;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||

View File

@@ -113,6 +113,7 @@ struct bpf_insn_aux_data {
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
};
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
int sanitize_stack_off; /* stack slot to be cleared */
bool seen; /* this insn was processed by the verifier */
};

View File

@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
/* ensure that an existing authorizer is up to date */
int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
struct ceph_auth_handshake *auth);
int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,

View File

@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit*
DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit*
DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down!
DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal
DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
@@ -209,7 +209,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_SERVER_JEWEL | \
CEPH_FEATURE_MON_STATEFUL_SUB | \
CEPH_FEATURE_CRUSH_TUNABLES5 | \
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
CEPH_FEATURE_CEPHX_V2)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \

View File

@@ -31,6 +31,9 @@ struct ceph_connection_operations {
struct ceph_auth_handshake *(*get_authorizer) (
struct ceph_connection *con,
int *proto, int force_new);
int (*add_authorizer_challenge)(struct ceph_connection *con,
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply) (struct ceph_connection *con);
int (*invalidate_authorizer)(struct ceph_connection *con);
@@ -203,9 +206,8 @@ struct ceph_connection {
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
void *auth_reply_buf; /* where to put the authorizer reply */
int auth_reply_buf_len;
struct mutex mutex;

View File

@@ -91,7 +91,7 @@ struct ceph_entity_inst {
#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */
/*
* connection negotiation

View File

@@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
extern int jump_label_text_reserved(void *start, void *end);
extern void static_key_slow_inc(struct static_key *key);
extern void static_key_slow_dec(struct static_key *key);
extern void static_key_slow_inc_cpuslocked(struct static_key *key);
extern void static_key_slow_dec_cpuslocked(struct static_key *key);
extern void jump_label_apply_nops(struct module *mod);
extern int static_key_count(struct static_key *key);
extern void static_key_enable(struct static_key *key);
@@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key)
atomic_dec(&key->enabled);
}
#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key)
#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key)
static inline int jump_label_text_reserved(void *start, void *end)
{
return 0;
@@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void);
#define static_branch_inc(x) static_key_slow_inc(&(x)->key)
#define static_branch_dec(x) static_key_slow_dec(&(x)->key)
#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key)
#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key)
/*
* Normal usage; boolean enable/disable.

View File

@@ -62,8 +62,8 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
#define PTRACE_MODE_READ 0x01
#define PTRACE_MODE_ATTACH 0x02
#define PTRACE_MODE_NOAUDIT 0x04
#define PTRACE_MODE_FSCREDS 0x08
#define PTRACE_MODE_REALCREDS 0x10
#define PTRACE_MODE_FSCREDS 0x08
#define PTRACE_MODE_REALCREDS 0x10
/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)

View File

@@ -1492,6 +1492,8 @@ static inline bool is_percpu_thread(void)
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1523,6 +1525,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{

20
include/linux/sched/smt.h Normal file
View File

@@ -0,0 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_SMT_H
#define _LINUX_SCHED_SMT_H
#include <linux/static_key.h>
#ifdef CONFIG_SCHED_SMT
extern struct static_key_false sched_smt_present;
static __always_inline bool sched_smt_active(void)
{
return static_branch_likely(&sched_smt_present);
}
#else
static inline bool sched_smt_active(void) { return false; }
#endif
void arch_smt_update(void);
#endif

View File

@@ -1288,6 +1288,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
}
}
static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
{
skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
}
static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
{
return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
}
static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
{
return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
}
/* Release a reference on a zerocopy structure */
static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
{
@@ -1297,7 +1313,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
if (uarg->callback == sock_zerocopy_callback) {
uarg->zerocopy = uarg->zerocopy && zerocopy;
sock_zerocopy_put(uarg);
} else {
} else if (!skb_zcopy_is_nouarg(skb)) {
uarg->callback(uarg, zerocopy);
}

View File

@@ -89,6 +89,8 @@ struct tls_context {
void *priv_ctx;
u8 tx_conf:2;
u16 prepend_size;
u16 tag_size;
u16 overhead_size;
@@ -104,7 +106,6 @@ struct tls_context {
u16 pending_open_record_frags;
int (*push_pending_record)(struct sock *sk, int flags);
void (*free_resources)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
void (*sk_proto_close)(struct sock *sk, long timeout);
@@ -129,6 +130,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_sw_close(struct sock *sk, long timeout);
void tls_sw_free_tx_resources(struct sock *sk);
void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
void tls_icsk_clean_acked(struct sock *sk);

View File

@@ -734,6 +734,7 @@ struct btrfs_balance_item {
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
#define BTRFS_FILE_EXTENT_TYPES 2
struct btrfs_file_extent_item {
/*

View File

@@ -203,6 +203,7 @@ struct prctl_mm_map {
#define PR_SET_SPECULATION_CTRL 53
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
# define PR_SPEC_INDIRECT_BRANCH 1
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)

View File

@@ -717,8 +717,9 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
static int check_stack_write(struct bpf_verifier_state *state, int off,
int size, int value_regno)
static int check_stack_write(struct bpf_verifier_env *env,
struct bpf_verifier_state *state, int off,
int size, int value_regno, int insn_idx)
{
int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -738,8 +739,32 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
state->spilled_regs[spi] = state->regs[value_regno];
state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
for (i = 0; i < BPF_REG_SIZE; i++)
for (i = 0; i < BPF_REG_SIZE; i++) {
if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
!env->allow_ptr_leaks) {
int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
int soff = (-spi - 1) * BPF_REG_SIZE;
/* detected reuse of integer stack slot with a pointer
* which means either llvm is reusing stack slot or
* an attacker is trying to exploit CVE-2018-3639
* (speculative store bypass)
* Have to sanitize that slot with preemptive
* store of zero.
*/
if (*poff && *poff != soff) {
/* disallow programs where single insn stores
* into two different stack slots, since verifier
* cannot sanitize them
*/
verbose("insn %d cannot access two stack slots fp%d and fp%d",
insn_idx, *poff, soff);
return -EINVAL;
}
*poff = soff;
}
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
}
} else {
/* regular write of data into stack */
state->spilled_regs[spi] = (struct bpf_reg_state) {};
@@ -1216,7 +1241,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
verbose("attempt to corrupt spilled pointer on stack\n");
return -EACCES;
}
err = check_stack_write(state, off, size, value_regno);
err = check_stack_write(env, state, off, size,
value_regno, insn_idx);
} else {
err = check_stack_read(state, off, size, value_regno);
}
@@ -4270,6 +4296,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
else
continue;
if (type == BPF_WRITE &&
env->insn_aux_data[i + delta].sanitize_stack_off) {
struct bpf_insn patch[] = {
/* Sanitize suspicious stack slot with zero.
* There are no memory dependencies for this store,
* since it's only using frame pointer and immediate
* constant of zero
*/
BPF_ST_MEM(BPF_DW, BPF_REG_FP,
env->insn_aux_data[i + delta].sanitize_stack_off,
0),
/* the original STX instruction will immediately
* overwrite the same stack slot with appropriate value
*/
*insn,
};
cnt = ARRAY_SIZE(patch);
new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
}
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;

View File

@@ -10,6 +10,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/task.h>
#include <linux/sched/smt.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/oom.h>
@@ -347,6 +348,12 @@ void cpu_hotplug_enable(void)
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */
/*
* Architectures that need SMT-specific errata handling during SMT hotplug
* should override this.
*/
void __weak arch_smt_update(void) { }
#ifdef CONFIG_HOTPLUG_SMT
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
EXPORT_SYMBOL_GPL(cpu_smt_control);
@@ -998,6 +1005,7 @@ out:
* concurrent CPU hotplug via cpu_add_remove_lock.
*/
lockup_detector_cleanup();
arch_smt_update();
return ret;
}
@@ -1126,6 +1134,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
ret = cpuhp_up_callbacks(cpu, st, target);
out:
cpus_write_unlock();
arch_smt_update();
return ret;
}
@@ -2078,8 +2087,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
*/
cpuhp_offline_cpu_device(cpu);
}
if (!ret)
if (!ret) {
cpu_smt_control = ctrlval;
arch_smt_update();
}
cpu_maps_update_done();
return ret;
}
@@ -2090,6 +2101,7 @@ static int cpuhp_smt_enable(void)
cpu_maps_update_begin();
cpu_smt_control = CPU_SMT_ENABLED;
arch_smt_update();
for_each_present_cpu(cpu) {
/* Skip online CPUs and CPUs on offline nodes */
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))

View File

@@ -79,7 +79,7 @@ int static_key_count(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_count);
static void static_key_slow_inc_cpuslocked(struct static_key *key)
void static_key_slow_inc_cpuslocked(struct static_key *key)
{
int v, v1;
@@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_disable);
static void static_key_slow_dec_cpuslocked(struct static_key *key,
static void __static_key_slow_dec_cpuslocked(struct static_key *key,
unsigned long rate_limit,
struct delayed_work *work)
{
@@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key,
struct delayed_work *work)
{
cpus_read_lock();
static_key_slow_dec_cpuslocked(key, rate_limit, work);
__static_key_slow_dec_cpuslocked(key, rate_limit, work);
cpus_read_unlock();
}
@@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_slow_dec);
void static_key_slow_dec_cpuslocked(struct static_key *key)
{
STATIC_KEY_CHECK_USE();
__static_key_slow_dec_cpuslocked(key, 0, NULL);
}
void static_key_slow_dec_deferred(struct static_key_deferred *key)
{
STATIC_KEY_CHECK_USE();

View File

@@ -5680,15 +5680,10 @@ int sched_cpu_activate(unsigned int cpu)
#ifdef CONFIG_SCHED_SMT
/*
* The sched_smt_present static key needs to be evaluated on every
* hotplug event because at boot time SMT might be disabled when
* the number of booted CPUs is limited.
*
* If then later a sibling gets hotplugged, then the key would stay
* off and SMT scheduling would never be functional.
* When going up, increment the number of cores with SMT present.
*/
if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
static_branch_enable_cpuslocked(&sched_smt_present);
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
static_branch_inc_cpuslocked(&sched_smt_present);
#endif
set_cpu_active(cpu, true);
@@ -5732,6 +5727,14 @@ int sched_cpu_deactivate(unsigned int cpu)
*/
synchronize_rcu_mult(call_rcu, call_rcu_sched);
#ifdef CONFIG_SCHED_SMT
/*
* When going down, decrement the number of cores with SMT present.
*/
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
static_branch_dec_cpuslocked(&sched_smt_present);
#endif
if (!sched_smp_initialized)
return 0;

View File

@@ -4311,12 +4311,12 @@ static inline bool cfs_bandwidth_used(void)
void cfs_bandwidth_usage_inc(void)
{
static_key_slow_inc(&__cfs_bandwidth_used);
static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
}
void cfs_bandwidth_usage_dec(void)
{
static_key_slow_dec(&__cfs_bandwidth_used);
static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
}
#else /* HAVE_JUMP_LABEL */
static bool cfs_bandwidth_used(void)

View File

@@ -20,6 +20,7 @@
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/sched/init.h>
#include <linux/sched/smt.h>
#include <linux/u64_stats_sync.h>
#include <linux/kernel_stat.h>
@@ -865,9 +866,6 @@ static inline int cpu_of(struct rq *rq)
#ifdef CONFIG_SCHED_SMT
extern struct static_key_false sched_smt_present;
extern void __update_idle_core(struct rq *rq);
static inline void update_idle_core(struct rq *rq)

Some files were not shown because too many files have changed in this diff Show More