Merge 4.14.86 into android-4.14-p
Changes in 4.14.86
mm/huge_memory: rename freeze_page() to unmap_page()
mm/huge_memory.c: reorder operations in __split_huge_page_tail()
mm/huge_memory: splitting set mapping+index before unfreeze
mm/huge_memory: fix lockdep complaint on 32-bit i_size_read()
mm/khugepaged: collapse_shmem() stop if punched or truncated
mm/khugepaged: fix crashes due to misaccounted holes
mm/khugepaged: collapse_shmem() remember to clear holes
mm/khugepaged: minor reorderings in collapse_shmem()
mm/khugepaged: collapse_shmem() without freezing new_page
mm/khugepaged: collapse_shmem() do not crash on Compound
media: em28xx: Fix use-after-free when disconnecting
ubi: Initialize Fastmap checkmapping correctly
libceph: store ceph_auth_handshake pointer in ceph_connection
libceph: factor out __prepare_write_connect()
libceph: factor out __ceph_x_decrypt()
libceph: factor out encrypt_authorizer()
libceph: add authorizer challenge
libceph: implement CEPHX_V2 calculation mode
bpf: Prevent memory disambiguation attack
tls: Add function to update the TLS socket configuration
tls: Fix TLS ulp context leak, when TLS_TX setsockopt is not used.
tls: Avoid copying crypto_info again after cipher_type check.
tls: don't override sk_write_space if tls_set_sw_offload fails.
tls: Use correct sk->sk_prot for IPV6
net/tls: Fixed return value when tls_complete_pending_work() fails
wil6210: missing length check in wmi_set_ie
btrfs: validate type when reading a chunk
btrfs: Verify that every chunk has corresponding block group at mount time
btrfs: Refactor check_leaf function for later expansion
btrfs: Check if item pointer overlaps with the item itself
btrfs: Add sanity check for EXTENT_DATA when reading out leaf
btrfs: Add checker for EXTENT_CSUM
btrfs: Move leaf and node validation checker to tree-checker.c
btrfs: tree-checker: Enhance btrfs_check_node output
btrfs: tree-checker: Fix false panic for sanity test
btrfs: tree-checker: Add checker for dir item
btrfs: tree-checker: use %zu format string for size_t
btrfs: tree-check: reduce stack consumption in check_dir_item
btrfs: tree-checker: Verify block_group_item
btrfs: tree-checker: Detect invalid and empty essential trees
btrfs: Check that each block group has corresponding chunk at mount time
btrfs: tree-checker: Check level for leaves and nodes
btrfs: tree-checker: Fix misleading group system information
f2fs: check blkaddr more accuratly before issue a bio
f2fs: sanity check on sit entry
f2fs: enhance sanity_check_raw_super() to avoid potential overflow
f2fs: clean up with is_valid_blkaddr()
f2fs: introduce and spread verify_blkaddr
f2fs: fix to do sanity check with secs_per_zone
f2fs: Add sanity_check_inode() function
f2fs: fix to do sanity check with extra_attr feature
f2fs: fix to do sanity check with user_block_count
f2fs: fix to do sanity check with node footer and iblocks
f2fs: fix to do sanity check with block address in main area
f2fs: fix to do sanity check with i_extra_isize
f2fs: fix to do sanity check with cp_pack_start_sum
xfs: don't fail when converting shortform attr to long form during ATTR_REPLACE
Revert "wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout()"
net: skb_scrub_packet(): Scrub offload_fwd_mark
net: thunderx: set xdp_prog to NULL if bpf_prog_add fails
virtio-net: disable guest csum during XDP set
virtio-net: fail XDP set if guest csum is negotiated
net: thunderx: set tso_hdrs pointer to NULL in nicvf_free_snd_queue
packet: copy user buffers before orphan or clone
rapidio/rionet: do not free skb before reading its length
s390/qeth: fix length check in SNMP processing
usbnet: ipheth: fix potential recvmsg bug and recvmsg bug 2
sched/core: Fix cpu.max vs. cpuhotplug deadlock
x86/bugs: Add AMD's variant of SSB_NO
x86/bugs: Add AMD's SPEC_CTRL MSR usage
x86/bugs: Switch the selection of mitigation from CPU vendor to CPU features
x86/bugs: Update when to check for the LS_CFG SSBD mitigation
x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR
x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation
x86/speculation: Apply IBPB more strictly to avoid cross-process data leak
x86/speculation: Propagate information about RSB filling mitigation to sysfs
x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant
x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support
x86/retpoline: Remove minimal retpoline support
x86/speculation: Update the TIF_SSBD comment
x86/speculation: Clean up spectre_v2_parse_cmdline()
x86/speculation: Remove unnecessary ret variable in cpu_show_common()
x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common()
x86/speculation: Disable STIBP when enhanced IBRS is in use
x86/speculation: Rename SSBD update functions
x86/speculation: Reorganize speculation control MSRs update
sched/smt: Make sched_smt_present track topology
x86/Kconfig: Select SCHED_SMT if SMP enabled
sched/smt: Expose sched_smt_present static key
x86/speculation: Rework SMT state change
x86/l1tf: Show actual SMT state
x86/speculation: Reorder the spec_v2 code
x86/speculation: Mark string arrays const correctly
x86/speculataion: Mark command line parser data __initdata
x86/speculation: Unify conditional spectre v2 print functions
x86/speculation: Add command line control for indirect branch speculation
x86/speculation: Prepare for per task indirect branch speculation control
x86/process: Consolidate and simplify switch_to_xtra() code
x86/speculation: Avoid __switch_to_xtra() calls
x86/speculation: Prepare for conditional IBPB in switch_mm()
ptrace: Remove unused ptrace_may_access_sched() and MODE_IBRS
x86/speculation: Split out TIF update
x86/speculation: Prevent stale SPEC_CTRL msr content
x86/speculation: Prepare arch_smt_update() for PRCTL mode
x86/speculation: Add prctl() control for indirect branch speculation
x86/speculation: Enable prctl mode for spectre_v2_user
x86/speculation: Add seccomp Spectre v2 user space protection mode
x86/speculation: Provide IBPB always command line options
kvm: mmu: Fix race in emulated page table writes
kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb
KVM: x86: Fix kernel info-leak in KVM_HC_CLOCK_PAIRING hypercall
KVM: X86: Fix scan ioapic use-before-initialization
xtensa: enable coprocessors that are being flushed
xtensa: fix coprocessor context offset definitions
xtensa: fix coprocessor part of ptrace_{get,set}xregs
Btrfs: ensure path name is null terminated at btrfs_control_ioctl
btrfs: relocation: set trans to be NULL after ending transaction
PCI: layerscape: Fix wrong invocation of outbound window disable accessor
arm64: dts: rockchip: Fix PCIe reset polarity for rk3399-puma-haikou.
x86/MCE/AMD: Fix the thresholding machinery initialization order
x86/fpu: Disable bottom halves while loading FPU registers
perf/x86/intel: Move branch tracing setup to the Intel-specific source file
perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts()
fs: fix lost error code in dio_complete
ALSA: wss: Fix invalid snd_free_pages() at error path
ALSA: ac97: Fix incorrect bit shift at AC97-SPSA control write
ALSA: control: Fix race between adding and removing a user element
ALSA: sparc: Fix invalid snd_free_pages() at error path
ALSA: hda/realtek - Support ALC300
ALSA: hda/realtek - fix headset mic detection for MSI MS-B171
ext2: fix potential use after free
ARM: dts: rockchip: Remove @0 from the veyron memory node
dmaengine: at_hdmac: fix memory leak in at_dma_xlate()
dmaengine: at_hdmac: fix module unloading
btrfs: release metadata before running delayed refs
staging: vchiq_arm: fix compat VCHIQ_IOC_AWAIT_COMPLETION
staging: rtl8723bs: Add missing return for cfg80211_rtw_get_station
USB: usb-storage: Add new IDs to ums-realtek
usb: core: quirks: add RESET_RESUME quirk for Cherry G230 Stream series
Revert "usb: dwc3: gadget: skip Set/Clear Halt when invalid"
iio:st_magn: Fix enable device after trigger
lib/test_kmod.c: fix rmmod double free
mm: use swp_offset as key in shmem_replace_page()
Drivers: hv: vmbus: check the creation_status in vmbus_establish_gpadl()
misc: mic/scif: fix copy-paste error in scif_create_remote_lookup
binder: fix race that allows malicious free of live buffer
libceph: weaken sizeof check in ceph_x_verify_authorizer_reply()
libceph: check authorizer reply/challenge length before reading
f2fs: fix missing up_read
Linux 4.14.86
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
@@ -3997,9 +3997,13 @@
|
||||
|
||||
spectre_v2= [X86] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability.
|
||||
The default operation protects the kernel from
|
||||
user space attacks.
|
||||
|
||||
on - unconditionally enable
|
||||
off - unconditionally disable
|
||||
on - unconditionally enable, implies
|
||||
spectre_v2_user=on
|
||||
off - unconditionally disable, implies
|
||||
spectre_v2_user=off
|
||||
auto - kernel detects whether your CPU model is
|
||||
vulnerable
|
||||
|
||||
@@ -4009,6 +4013,12 @@
|
||||
CONFIG_RETPOLINE configuration option, and the
|
||||
compiler with which the kernel was built.
|
||||
|
||||
Selecting 'on' will also enable the mitigation
|
||||
against user space to user space task attacks.
|
||||
|
||||
Selecting 'off' will disable both the kernel and
|
||||
the user space protections.
|
||||
|
||||
Specific mitigations can also be selected manually:
|
||||
|
||||
retpoline - replace indirect branches
|
||||
@@ -4018,6 +4028,48 @@
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2=auto.
|
||||
|
||||
spectre_v2_user=
|
||||
[X86] Control mitigation of Spectre variant 2
|
||||
(indirect branch speculation) vulnerability between
|
||||
user space tasks
|
||||
|
||||
on - Unconditionally enable mitigations. Is
|
||||
enforced by spectre_v2=on
|
||||
|
||||
off - Unconditionally disable mitigations. Is
|
||||
enforced by spectre_v2=off
|
||||
|
||||
prctl - Indirect branch speculation is enabled,
|
||||
but mitigation can be enabled via prctl
|
||||
per thread. The mitigation control state
|
||||
is inherited on fork.
|
||||
|
||||
prctl,ibpb
|
||||
- Like "prctl" above, but only STIBP is
|
||||
controlled per thread. IBPB is issued
|
||||
always when switching between different user
|
||||
space processes.
|
||||
|
||||
seccomp
|
||||
- Same as "prctl" above, but all seccomp
|
||||
threads will enable the mitigation unless
|
||||
they explicitly opt out.
|
||||
|
||||
seccomp,ibpb
|
||||
- Like "seccomp" above, but only STIBP is
|
||||
controlled per thread. IBPB is issued
|
||||
always when switching between different
|
||||
user space processes.
|
||||
|
||||
auto - Kernel selects the mitigation depending on
|
||||
the available CPU features and vulnerability.
|
||||
|
||||
Default mitigation:
|
||||
If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
spectre_v2_user=auto.
|
||||
|
||||
spec_store_bypass_disable=
|
||||
[HW] Control Speculative Store Bypass (SSB) Disable mitigation
|
||||
(Speculative Store Bypass vulnerability)
|
||||
|
||||
@@ -92,3 +92,12 @@ Speculation misfeature controls
|
||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
|
||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
|
||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
|
||||
|
||||
- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
|
||||
(Mitigate Spectre V2 style attacks against user processes)
|
||||
|
||||
Invocations:
|
||||
* prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
|
||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
|
||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
|
||||
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
|
||||
|
||||
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 14
|
||||
SUBLEVEL = 85
|
||||
SUBLEVEL = 86
|
||||
EXTRAVERSION =
|
||||
NAME = Petit Gorille
|
||||
|
||||
|
||||
@@ -47,7 +47,11 @@
|
||||
#include "rk3288.dtsi"
|
||||
|
||||
/ {
|
||||
memory@0 {
|
||||
/*
|
||||
* The default coreboot on veyron devices ignores memory@0 nodes
|
||||
* and would instead create another memory node.
|
||||
*/
|
||||
memory {
|
||||
device_type = "memory";
|
||||
reg = <0x0 0x0 0x0 0x80000000>;
|
||||
};
|
||||
|
||||
@@ -130,7 +130,7 @@
|
||||
};
|
||||
|
||||
&pcie0 {
|
||||
ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
|
||||
ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
|
||||
num-lanes = <4>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&pcie_clkreqn_cpm>;
|
||||
|
||||
@@ -440,10 +440,6 @@ config RETPOLINE
|
||||
branches. Requires a compiler with -mindirect-branch=thunk-extern
|
||||
support for full protection. The kernel may run slower.
|
||||
|
||||
Without compiler support, at least indirect branches in assembler
|
||||
code are eliminated. Since this includes the syscall entry path,
|
||||
it is not entirely pointless.
|
||||
|
||||
config INTEL_RDT
|
||||
bool "Intel Resource Director Technology support"
|
||||
default n
|
||||
@@ -959,13 +955,7 @@ config NR_CPUS
|
||||
approximately eight kilobytes to the kernel image.
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT (Hyperthreading) scheduler support"
|
||||
depends on SMP
|
||||
---help---
|
||||
SMT scheduler support improves the CPU scheduler's decision making
|
||||
when dealing with Intel Pentium 4 chips with HyperThreading at a
|
||||
cost of slightly increased overhead in some places. If unsure say
|
||||
N here.
|
||||
def_bool y if SMP
|
||||
|
||||
config SCHED_MC
|
||||
def_bool y
|
||||
|
||||
@@ -243,9 +243,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
||||
|
||||
# Avoid indirect branches in kernel to deal with Spectre
|
||||
ifdef CONFIG_RETPOLINE
|
||||
ifneq ($(RETPOLINE_CFLAGS),)
|
||||
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
|
||||
ifeq ($(RETPOLINE_CFLAGS),)
|
||||
$(error You are building kernel with non-retpoline compiler, please update your compiler.)
|
||||
endif
|
||||
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
|
||||
endif
|
||||
|
||||
archscripts: scripts_basic
|
||||
|
||||
@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
|
||||
if (config == -1LL)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Branch tracing:
|
||||
*/
|
||||
if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
|
||||
!attr->freq && hwc->sample_period == 1) {
|
||||
/* BTS is not supported by this architecture. */
|
||||
if (!x86_pmu.bts_active)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* BTS is currently only allowed for user-mode. */
|
||||
if (!attr->exclude_kernel)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* disallow bts if conflicting events are present */
|
||||
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
|
||||
return -EBUSY;
|
||||
|
||||
event->destroy = hw_perf_lbr_event_destroy;
|
||||
}
|
||||
|
||||
hwc->config |= config;
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -2345,16 +2345,7 @@ done:
|
||||
static struct event_constraint *
|
||||
intel_bts_constraints(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned int hw_event, bts_event;
|
||||
|
||||
if (event->attr.freq)
|
||||
return NULL;
|
||||
|
||||
hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
|
||||
bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
|
||||
|
||||
if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
|
||||
if (unlikely(intel_pmu_has_bts(event)))
|
||||
return &bts_constraint;
|
||||
|
||||
return NULL;
|
||||
@@ -2973,10 +2964,47 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
|
||||
return flags;
|
||||
}
|
||||
|
||||
static int intel_pmu_bts_config(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_attr *attr = &event->attr;
|
||||
|
||||
if (unlikely(intel_pmu_has_bts(event))) {
|
||||
/* BTS is not supported by this architecture. */
|
||||
if (!x86_pmu.bts_active)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* BTS is currently only allowed for user-mode. */
|
||||
if (!attr->exclude_kernel)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* disallow bts if conflicting events are present */
|
||||
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
|
||||
return -EBUSY;
|
||||
|
||||
event->destroy = hw_perf_lbr_event_destroy;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int core_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return intel_pmu_bts_config(event);
|
||||
}
|
||||
|
||||
static int intel_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = intel_pmu_bts_config(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -2999,7 +3027,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
/*
|
||||
* BTS is set up earlier in this path, so don't account twice
|
||||
*/
|
||||
if (!intel_pmu_has_bts(event)) {
|
||||
if (!unlikely(intel_pmu_has_bts(event))) {
|
||||
/* disallow lbr if conflicting events are present */
|
||||
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
|
||||
return -EBUSY;
|
||||
@@ -3462,7 +3490,7 @@ static __initconst const struct x86_pmu core_pmu = {
|
||||
.enable_all = core_pmu_enable_all,
|
||||
.enable = core_pmu_enable_event,
|
||||
.disable = x86_pmu_disable_event,
|
||||
.hw_config = x86_pmu_hw_config,
|
||||
.hw_config = core_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
||||
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
||||
|
||||
@@ -850,11 +850,16 @@ static inline int amd_pmu_init(void)
|
||||
|
||||
static inline bool intel_pmu_has_bts(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
|
||||
!event->attr.freq && event->hw.sample_period == 1)
|
||||
return true;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned int hw_event, bts_event;
|
||||
|
||||
return false;
|
||||
if (event->attr.freq)
|
||||
return false;
|
||||
|
||||
hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
|
||||
bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
|
||||
|
||||
return hw_event == bts_event && hwc->sample_period == 1;
|
||||
}
|
||||
|
||||
int intel_pmu_save_and_restart(struct perf_event *event);
|
||||
|
||||
@@ -284,7 +284,9 @@
|
||||
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
|
||||
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
|
||||
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
|
||||
@@ -41,9 +41,10 @@
|
||||
|
||||
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
|
||||
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
|
||||
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
|
||||
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
|
||||
#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
|
||||
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
|
||||
#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
|
||||
#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
|
||||
|
||||
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
|
||||
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
#ifndef _ASM_X86_NOSPEC_BRANCH_H_
|
||||
#define _ASM_X86_NOSPEC_BRANCH_H_
|
||||
|
||||
#include <linux/static_key.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
@@ -162,29 +164,35 @@
|
||||
_ASM_PTR " 999b\n\t" \
|
||||
".popsection\n\t"
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/*
|
||||
* Since the inline asm uses the %V modifier which is only in newer GCC,
|
||||
* the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
|
||||
* Inline asm uses the %V modifier which is only in newer GCC
|
||||
* which is ensured when CONFIG_RETPOLINE is defined.
|
||||
*/
|
||||
# define CALL_NOSPEC \
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||
ALTERNATIVE( \
|
||||
ALTERNATIVE_2( \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
"call __x86_indirect_thunk_%V[thunk_target]\n", \
|
||||
X86_FEATURE_RETPOLINE)
|
||||
X86_FEATURE_RETPOLINE, \
|
||||
"lfence;\n" \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
X86_FEATURE_RETPOLINE_AMD)
|
||||
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
|
||||
|
||||
#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
|
||||
#else /* CONFIG_X86_32 */
|
||||
/*
|
||||
* For i386 we use the original ret-equivalent retpoline, because
|
||||
* otherwise we'll run out of registers. We don't care about CET
|
||||
* here, anyway.
|
||||
*/
|
||||
# define CALL_NOSPEC \
|
||||
ALTERNATIVE( \
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||
ALTERNATIVE_2( \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
" jmp 904f;\n" \
|
||||
@@ -199,9 +207,14 @@
|
||||
" ret;\n" \
|
||||
" .align 16\n" \
|
||||
"904: call 901b;\n", \
|
||||
X86_FEATURE_RETPOLINE)
|
||||
X86_FEATURE_RETPOLINE, \
|
||||
"lfence;\n" \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
X86_FEATURE_RETPOLINE_AMD)
|
||||
|
||||
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
||||
#endif
|
||||
#else /* No retpoline for C / inline asm */
|
||||
# define CALL_NOSPEC "call *%[thunk_target]\n"
|
||||
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
|
||||
@@ -210,14 +223,19 @@
|
||||
/* The Spectre V2 mitigation variants */
|
||||
enum spectre_v2_mitigation {
|
||||
SPECTRE_V2_NONE,
|
||||
SPECTRE_V2_RETPOLINE_MINIMAL,
|
||||
SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
|
||||
SPECTRE_V2_RETPOLINE_GENERIC,
|
||||
SPECTRE_V2_RETPOLINE_AMD,
|
||||
SPECTRE_V2_IBRS,
|
||||
SPECTRE_V2_IBRS_ENHANCED,
|
||||
};
|
||||
|
||||
/* The indirect branch speculation control variants */
|
||||
enum spectre_v2_user_mitigation {
|
||||
SPECTRE_V2_USER_NONE,
|
||||
SPECTRE_V2_USER_STRICT,
|
||||
SPECTRE_V2_USER_PRCTL,
|
||||
SPECTRE_V2_USER_SECCOMP,
|
||||
};
|
||||
|
||||
/* The Speculative Store Bypass disable variants */
|
||||
enum ssb_mitigation {
|
||||
SPEC_STORE_BYPASS_NONE,
|
||||
@@ -295,6 +313,10 @@ do { \
|
||||
preempt_enable(); \
|
||||
} while (0)
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
|
||||
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
|
||||
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
/*
|
||||
|
||||
@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
|
||||
return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
|
||||
}
|
||||
|
||||
static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
|
||||
{
|
||||
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
|
||||
return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
|
||||
}
|
||||
|
||||
static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
|
||||
{
|
||||
BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
|
||||
return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
|
||||
}
|
||||
|
||||
static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
|
||||
{
|
||||
BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
|
||||
return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
|
||||
}
|
||||
|
||||
static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
|
||||
{
|
||||
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
|
||||
@@ -70,11 +82,7 @@ extern void speculative_store_bypass_ht_init(void);
|
||||
static inline void speculative_store_bypass_ht_init(void) { }
|
||||
#endif
|
||||
|
||||
extern void speculative_store_bypass_update(unsigned long tif);
|
||||
|
||||
static inline void speculative_store_bypass_update_current(void)
|
||||
{
|
||||
speculative_store_bypass_update(current_thread_info()->flags);
|
||||
}
|
||||
extern void speculation_ctrl_update(unsigned long tif);
|
||||
extern void speculation_ctrl_update_current(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
|
||||
|
||||
__visible struct task_struct *__switch_to(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
struct tss_struct;
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
struct tss_struct *tss);
|
||||
|
||||
/* This runs runs on the previous thread's stack. */
|
||||
static inline void prepare_switch_to(struct task_struct *prev,
|
||||
|
||||
@@ -81,10 +81,12 @@ struct thread_info {
|
||||
#define TIF_SIGPENDING 2 /* signal pending */
|
||||
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
||||
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
||||
#define TIF_SSBD 5 /* Reduced data speculation */
|
||||
#define TIF_SSBD 5 /* Speculative store bypass disable */
|
||||
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
|
||||
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
||||
#define TIF_SECCOMP 8 /* secure computing */
|
||||
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
|
||||
#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */
|
||||
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
||||
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
||||
#define TIF_PATCH_PENDING 13 /* pending live patching update */
|
||||
@@ -112,6 +114,8 @@ struct thread_info {
|
||||
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
||||
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
|
||||
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
|
||||
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
||||
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
||||
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
|
||||
@@ -147,8 +151,18 @@ struct thread_info {
|
||||
_TIF_FSCHECK)
|
||||
|
||||
/* flags to check in __switch_to() */
|
||||
#define _TIF_WORK_CTXSW \
|
||||
(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
|
||||
#define _TIF_WORK_CTXSW_BASE \
|
||||
(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
|
||||
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
|
||||
|
||||
/*
|
||||
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
|
||||
#else
|
||||
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
|
||||
#endif
|
||||
|
||||
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
||||
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
|
||||
|
||||
@@ -185,10 +185,14 @@ struct tlb_state {
|
||||
|
||||
#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
|
||||
|
||||
/* Last user mm for optimizing IBPB */
|
||||
union {
|
||||
struct mm_struct *last_user_mm;
|
||||
unsigned long last_user_mm_ibpb;
|
||||
};
|
||||
|
||||
u16 loaded_mm_asid;
|
||||
u16 next_asid;
|
||||
/* last user mm's ctx id */
|
||||
u64 last_ctx_id;
|
||||
|
||||
/*
|
||||
* We can be in one of several states:
|
||||
|
||||
@@ -554,7 +554,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
nodes_per_socket = ((value >> 3) & 7) + 1;
|
||||
}
|
||||
|
||||
if (c->x86 >= 0x15 && c->x86 <= 0x17) {
|
||||
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
|
||||
!boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
|
||||
c->x86 >= 0x15 && c->x86 <= 0x17) {
|
||||
unsigned int bit;
|
||||
|
||||
switch (c->x86) {
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <linux/sched/smt.h>
|
||||
|
||||
#include <asm/spec-ctrl.h>
|
||||
#include <asm/cmdline.h>
|
||||
@@ -34,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void);
|
||||
static void __init ssb_select_mitigation(void);
|
||||
static void __init l1tf_select_mitigation(void);
|
||||
|
||||
/*
|
||||
* Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
|
||||
* writes to SPEC_CTRL contain whatever reserved bits have been set.
|
||||
*/
|
||||
u64 __ro_after_init x86_spec_ctrl_base;
|
||||
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
|
||||
u64 x86_spec_ctrl_base;
|
||||
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
|
||||
static DEFINE_MUTEX(spec_ctrl_mutex);
|
||||
|
||||
/*
|
||||
* The vendor and possibly platform specific bits which can be modified in
|
||||
@@ -54,6 +53,13 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
|
||||
u64 __ro_after_init x86_amd_ls_cfg_base;
|
||||
u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
|
||||
|
||||
/* Control conditional STIPB in switch_to() */
|
||||
DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
|
||||
/* Control conditional IBPB in switch_mm() */
|
||||
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
|
||||
/* Control unconditional IBPB in switch_mm() */
|
||||
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
|
||||
|
||||
void __init check_bugs(void)
|
||||
{
|
||||
identify_boot_cpu();
|
||||
@@ -124,31 +130,6 @@ void __init check_bugs(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The kernel command line selection */
|
||||
enum spectre_v2_mitigation_cmd {
|
||||
SPECTRE_V2_CMD_NONE,
|
||||
SPECTRE_V2_CMD_AUTO,
|
||||
SPECTRE_V2_CMD_FORCE,
|
||||
SPECTRE_V2_CMD_RETPOLINE,
|
||||
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
|
||||
SPECTRE_V2_CMD_RETPOLINE_AMD,
|
||||
};
|
||||
|
||||
static const char *spectre_v2_strings[] = {
|
||||
[SPECTRE_V2_NONE] = "Vulnerable",
|
||||
[SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
|
||||
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
|
||||
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
|
||||
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
|
||||
[SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
|
||||
};
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Spectre V2 : " fmt
|
||||
|
||||
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
|
||||
SPECTRE_V2_NONE;
|
||||
|
||||
void
|
||||
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
|
||||
{
|
||||
@@ -166,9 +147,14 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
|
||||
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
|
||||
|
||||
/* SSBD controlled in MSR_SPEC_CTRL */
|
||||
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
|
||||
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
|
||||
static_cpu_has(X86_FEATURE_AMD_SSBD))
|
||||
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
|
||||
|
||||
/* Conditional STIBP enabled? */
|
||||
if (static_branch_unlikely(&switch_to_cond_stibp))
|
||||
hostval |= stibp_tif_to_spec_ctrl(ti->flags);
|
||||
|
||||
if (hostval != guestval) {
|
||||
msrval = setguest ? guestval : hostval;
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
|
||||
@@ -202,7 +188,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
|
||||
tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
|
||||
ssbd_spec_ctrl_to_tif(hostval);
|
||||
|
||||
speculative_store_bypass_update(tif);
|
||||
speculation_ctrl_update(tif);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
|
||||
@@ -217,6 +203,15 @@ static void x86_amd_ssb_disable(void)
|
||||
wrmsrl(MSR_AMD64_LS_CFG, msrval);
|
||||
}
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Spectre V2 : " fmt
|
||||
|
||||
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
|
||||
SPECTRE_V2_NONE;
|
||||
|
||||
static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
|
||||
SPECTRE_V2_USER_NONE;
|
||||
|
||||
#ifdef RETPOLINE
|
||||
static bool spectre_v2_bad_module;
|
||||
|
||||
@@ -238,23 +233,6 @@ static inline const char *spectre_v2_module_string(void)
|
||||
static inline const char *spectre_v2_module_string(void) { return ""; }
|
||||
#endif
|
||||
|
||||
static void __init spec2_print_if_insecure(const char *reason)
|
||||
{
|
||||
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
||||
pr_info("%s selected on command line.\n", reason);
|
||||
}
|
||||
|
||||
static void __init spec2_print_if_secure(const char *reason)
|
||||
{
|
||||
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
||||
pr_info("%s selected on command line.\n", reason);
|
||||
}
|
||||
|
||||
static inline bool retp_compiler(void)
|
||||
{
|
||||
return __is_defined(RETPOLINE);
|
||||
}
|
||||
|
||||
static inline bool match_option(const char *arg, int arglen, const char *opt)
|
||||
{
|
||||
int len = strlen(opt);
|
||||
@@ -262,43 +240,210 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
|
||||
return len == arglen && !strncmp(arg, opt, len);
|
||||
}
|
||||
|
||||
/* The kernel command line selection for spectre v2 */
|
||||
enum spectre_v2_mitigation_cmd {
|
||||
SPECTRE_V2_CMD_NONE,
|
||||
SPECTRE_V2_CMD_AUTO,
|
||||
SPECTRE_V2_CMD_FORCE,
|
||||
SPECTRE_V2_CMD_RETPOLINE,
|
||||
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
|
||||
SPECTRE_V2_CMD_RETPOLINE_AMD,
|
||||
};
|
||||
|
||||
enum spectre_v2_user_cmd {
|
||||
SPECTRE_V2_USER_CMD_NONE,
|
||||
SPECTRE_V2_USER_CMD_AUTO,
|
||||
SPECTRE_V2_USER_CMD_FORCE,
|
||||
SPECTRE_V2_USER_CMD_PRCTL,
|
||||
SPECTRE_V2_USER_CMD_PRCTL_IBPB,
|
||||
SPECTRE_V2_USER_CMD_SECCOMP,
|
||||
SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
|
||||
};
|
||||
|
||||
static const char * const spectre_v2_user_strings[] = {
|
||||
[SPECTRE_V2_USER_NONE] = "User space: Vulnerable",
|
||||
[SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection",
|
||||
[SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl",
|
||||
[SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl",
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *option;
|
||||
enum spectre_v2_user_cmd cmd;
|
||||
bool secure;
|
||||
} v2_user_options[] __initdata = {
|
||||
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
|
||||
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
|
||||
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
|
||||
{ "prctl", SPECTRE_V2_USER_CMD_PRCTL, false },
|
||||
{ "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false },
|
||||
{ "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false },
|
||||
{ "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false },
|
||||
};
|
||||
|
||||
static void __init spec_v2_user_print_cond(const char *reason, bool secure)
|
||||
{
|
||||
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
|
||||
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
|
||||
}
|
||||
|
||||
static enum spectre_v2_user_cmd __init
|
||||
spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
|
||||
{
|
||||
char arg[20];
|
||||
int ret, i;
|
||||
|
||||
switch (v2_cmd) {
|
||||
case SPECTRE_V2_CMD_NONE:
|
||||
return SPECTRE_V2_USER_CMD_NONE;
|
||||
case SPECTRE_V2_CMD_FORCE:
|
||||
return SPECTRE_V2_USER_CMD_FORCE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
|
||||
arg, sizeof(arg));
|
||||
if (ret < 0)
|
||||
return SPECTRE_V2_USER_CMD_AUTO;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
|
||||
if (match_option(arg, ret, v2_user_options[i].option)) {
|
||||
spec_v2_user_print_cond(v2_user_options[i].option,
|
||||
v2_user_options[i].secure);
|
||||
return v2_user_options[i].cmd;
|
||||
}
|
||||
}
|
||||
|
||||
pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
|
||||
return SPECTRE_V2_USER_CMD_AUTO;
|
||||
}
|
||||
|
||||
static void __init
|
||||
spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
||||
{
|
||||
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
|
||||
bool smt_possible = IS_ENABLED(CONFIG_SMP);
|
||||
enum spectre_v2_user_cmd cmd;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
|
||||
return;
|
||||
|
||||
if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
|
||||
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
|
||||
smt_possible = false;
|
||||
|
||||
cmd = spectre_v2_parse_user_cmdline(v2_cmd);
|
||||
switch (cmd) {
|
||||
case SPECTRE_V2_USER_CMD_NONE:
|
||||
goto set_mode;
|
||||
case SPECTRE_V2_USER_CMD_FORCE:
|
||||
mode = SPECTRE_V2_USER_STRICT;
|
||||
break;
|
||||
case SPECTRE_V2_USER_CMD_PRCTL:
|
||||
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
|
||||
mode = SPECTRE_V2_USER_PRCTL;
|
||||
break;
|
||||
case SPECTRE_V2_USER_CMD_AUTO:
|
||||
case SPECTRE_V2_USER_CMD_SECCOMP:
|
||||
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
|
||||
if (IS_ENABLED(CONFIG_SECCOMP))
|
||||
mode = SPECTRE_V2_USER_SECCOMP;
|
||||
else
|
||||
mode = SPECTRE_V2_USER_PRCTL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Initialize Indirect Branch Prediction Barrier */
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB)) {
|
||||
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
|
||||
|
||||
switch (cmd) {
|
||||
case SPECTRE_V2_USER_CMD_FORCE:
|
||||
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
|
||||
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
|
||||
static_branch_enable(&switch_mm_always_ibpb);
|
||||
break;
|
||||
case SPECTRE_V2_USER_CMD_PRCTL:
|
||||
case SPECTRE_V2_USER_CMD_AUTO:
|
||||
case SPECTRE_V2_USER_CMD_SECCOMP:
|
||||
static_branch_enable(&switch_mm_cond_ibpb);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
|
||||
static_key_enabled(&switch_mm_always_ibpb) ?
|
||||
"always-on" : "conditional");
|
||||
}
|
||||
|
||||
/* If enhanced IBRS is enabled no STIPB required */
|
||||
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If SMT is not possible or STIBP is not available clear the STIPB
|
||||
* mode.
|
||||
*/
|
||||
if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
|
||||
mode = SPECTRE_V2_USER_NONE;
|
||||
set_mode:
|
||||
spectre_v2_user = mode;
|
||||
/* Only print the STIBP mode when SMT possible */
|
||||
if (smt_possible)
|
||||
pr_info("%s\n", spectre_v2_user_strings[mode]);
|
||||
}
|
||||
|
||||
static const char * const spectre_v2_strings[] = {
|
||||
[SPECTRE_V2_NONE] = "Vulnerable",
|
||||
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
|
||||
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
|
||||
[SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *option;
|
||||
enum spectre_v2_mitigation_cmd cmd;
|
||||
bool secure;
|
||||
} mitigation_options[] = {
|
||||
{ "off", SPECTRE_V2_CMD_NONE, false },
|
||||
{ "on", SPECTRE_V2_CMD_FORCE, true },
|
||||
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
|
||||
{ "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
|
||||
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
|
||||
{ "auto", SPECTRE_V2_CMD_AUTO, false },
|
||||
} mitigation_options[] __initdata = {
|
||||
{ "off", SPECTRE_V2_CMD_NONE, false },
|
||||
{ "on", SPECTRE_V2_CMD_FORCE, true },
|
||||
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
|
||||
{ "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
|
||||
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
|
||||
{ "auto", SPECTRE_V2_CMD_AUTO, false },
|
||||
};
|
||||
|
||||
static void __init spec_v2_print_cond(const char *reason, bool secure)
|
||||
{
|
||||
if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
|
||||
pr_info("%s selected on command line.\n", reason);
|
||||
}
|
||||
|
||||
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
|
||||
{
|
||||
enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
|
||||
char arg[20];
|
||||
int ret, i;
|
||||
enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
|
||||
return SPECTRE_V2_CMD_NONE;
|
||||
else {
|
||||
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
|
||||
if (ret < 0)
|
||||
return SPECTRE_V2_CMD_AUTO;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
|
||||
if (!match_option(arg, ret, mitigation_options[i].option))
|
||||
continue;
|
||||
cmd = mitigation_options[i].cmd;
|
||||
break;
|
||||
}
|
||||
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
|
||||
if (ret < 0)
|
||||
return SPECTRE_V2_CMD_AUTO;
|
||||
|
||||
if (i >= ARRAY_SIZE(mitigation_options)) {
|
||||
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
|
||||
return SPECTRE_V2_CMD_AUTO;
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
|
||||
if (!match_option(arg, ret, mitigation_options[i].option))
|
||||
continue;
|
||||
cmd = mitigation_options[i].cmd;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i >= ARRAY_SIZE(mitigation_options)) {
|
||||
pr_err("unknown option (%s). Switching to AUTO select\n", arg);
|
||||
return SPECTRE_V2_CMD_AUTO;
|
||||
}
|
||||
|
||||
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
|
||||
@@ -315,11 +460,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
|
||||
return SPECTRE_V2_CMD_AUTO;
|
||||
}
|
||||
|
||||
if (mitigation_options[i].secure)
|
||||
spec2_print_if_secure(mitigation_options[i].option);
|
||||
else
|
||||
spec2_print_if_insecure(mitigation_options[i].option);
|
||||
|
||||
spec_v2_print_cond(mitigation_options[i].option,
|
||||
mitigation_options[i].secure);
|
||||
return cmd;
|
||||
}
|
||||
|
||||
@@ -375,14 +517,12 @@ retpoline_auto:
|
||||
pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
|
||||
goto retpoline_generic;
|
||||
}
|
||||
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
|
||||
SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
|
||||
mode = SPECTRE_V2_RETPOLINE_AMD;
|
||||
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
|
||||
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
|
||||
} else {
|
||||
retpoline_generic:
|
||||
mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
|
||||
SPECTRE_V2_RETPOLINE_MINIMAL;
|
||||
mode = SPECTRE_V2_RETPOLINE_GENERIC;
|
||||
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
|
||||
}
|
||||
|
||||
@@ -401,12 +541,6 @@ specv2_set_mode:
|
||||
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
|
||||
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
|
||||
|
||||
/* Initialize Indirect Branch Prediction Barrier if supported */
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB)) {
|
||||
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
|
||||
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Retpoline means the kernel is safe because it has no indirect
|
||||
* branches. Enhanced IBRS protects firmware too, so, enable restricted
|
||||
@@ -422,6 +556,66 @@ specv2_set_mode:
|
||||
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
|
||||
pr_info("Enabling Restricted Speculation for firmware calls\n");
|
||||
}
|
||||
|
||||
/* Set up IBPB and STIBP depending on the general spectre V2 command */
|
||||
spectre_v2_user_select_mitigation(cmd);
|
||||
|
||||
/* Enable STIBP if appropriate */
|
||||
arch_smt_update();
|
||||
}
|
||||
|
||||
static void update_stibp_msr(void * __unused)
|
||||
{
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
||||
}
|
||||
|
||||
/* Update x86_spec_ctrl_base in case SMT state changed. */
|
||||
static void update_stibp_strict(void)
|
||||
{
|
||||
u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
|
||||
|
||||
if (sched_smt_active())
|
||||
mask |= SPEC_CTRL_STIBP;
|
||||
|
||||
if (mask == x86_spec_ctrl_base)
|
||||
return;
|
||||
|
||||
pr_info("Update user space SMT mitigation: STIBP %s\n",
|
||||
mask & SPEC_CTRL_STIBP ? "always-on" : "off");
|
||||
x86_spec_ctrl_base = mask;
|
||||
on_each_cpu(update_stibp_msr, NULL, 1);
|
||||
}
|
||||
|
||||
/* Update the static key controlling the evaluation of TIF_SPEC_IB */
|
||||
static void update_indir_branch_cond(void)
|
||||
{
|
||||
if (sched_smt_active())
|
||||
static_branch_enable(&switch_to_cond_stibp);
|
||||
else
|
||||
static_branch_disable(&switch_to_cond_stibp);
|
||||
}
|
||||
|
||||
void arch_smt_update(void)
|
||||
{
|
||||
/* Enhanced IBRS implies STIBP. No update required. */
|
||||
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
||||
return;
|
||||
|
||||
mutex_lock(&spec_ctrl_mutex);
|
||||
|
||||
switch (spectre_v2_user) {
|
||||
case SPECTRE_V2_USER_NONE:
|
||||
break;
|
||||
case SPECTRE_V2_USER_STRICT:
|
||||
update_stibp_strict();
|
||||
break;
|
||||
case SPECTRE_V2_USER_PRCTL:
|
||||
case SPECTRE_V2_USER_SECCOMP:
|
||||
update_indir_branch_cond();
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&spec_ctrl_mutex);
|
||||
}
|
||||
|
||||
#undef pr_fmt
|
||||
@@ -438,7 +632,7 @@ enum ssb_mitigation_cmd {
|
||||
SPEC_STORE_BYPASS_CMD_SECCOMP,
|
||||
};
|
||||
|
||||
static const char *ssb_strings[] = {
|
||||
static const char * const ssb_strings[] = {
|
||||
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
|
||||
[SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
|
||||
[SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
|
||||
@@ -448,7 +642,7 @@ static const char *ssb_strings[] = {
|
||||
static const struct {
|
||||
const char *option;
|
||||
enum ssb_mitigation_cmd cmd;
|
||||
} ssb_mitigation_options[] = {
|
||||
} ssb_mitigation_options[] __initdata = {
|
||||
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
|
||||
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
|
||||
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
|
||||
@@ -532,18 +726,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
|
||||
if (mode == SPEC_STORE_BYPASS_DISABLE) {
|
||||
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
|
||||
/*
|
||||
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
|
||||
* a completely different MSR and bit dependent on family.
|
||||
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
|
||||
* use a completely different MSR and bit dependent on family.
|
||||
*/
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_INTEL:
|
||||
if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
|
||||
!static_cpu_has(X86_FEATURE_AMD_SSBD)) {
|
||||
x86_amd_ssb_disable();
|
||||
} else {
|
||||
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
|
||||
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
||||
break;
|
||||
case X86_VENDOR_AMD:
|
||||
x86_amd_ssb_disable();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -561,10 +753,25 @@ static void ssb_select_mitigation(void)
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Speculation prctl: " fmt
|
||||
|
||||
static void task_update_spec_tif(struct task_struct *tsk)
|
||||
{
|
||||
/* Force the update of the real TIF bits */
|
||||
set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
|
||||
|
||||
/*
|
||||
* Immediately update the speculation control MSRs for the current
|
||||
* task, but for a non-current task delay setting the CPU
|
||||
* mitigation until it is scheduled next.
|
||||
*
|
||||
* This can only happen for SECCOMP mitigation. For PRCTL it's
|
||||
* always the current task.
|
||||
*/
|
||||
if (tsk == current)
|
||||
speculation_ctrl_update_current();
|
||||
}
|
||||
|
||||
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
|
||||
{
|
||||
bool update;
|
||||
|
||||
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
|
||||
ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
|
||||
return -ENXIO;
|
||||
@@ -575,28 +782,56 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
|
||||
if (task_spec_ssb_force_disable(task))
|
||||
return -EPERM;
|
||||
task_clear_spec_ssb_disable(task);
|
||||
update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
|
||||
task_update_spec_tif(task);
|
||||
break;
|
||||
case PR_SPEC_DISABLE:
|
||||
task_set_spec_ssb_disable(task);
|
||||
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
|
||||
task_update_spec_tif(task);
|
||||
break;
|
||||
case PR_SPEC_FORCE_DISABLE:
|
||||
task_set_spec_ssb_disable(task);
|
||||
task_set_spec_ssb_force_disable(task);
|
||||
update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
|
||||
task_update_spec_tif(task);
|
||||
break;
|
||||
default:
|
||||
return -ERANGE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If being set on non-current task, delay setting the CPU
|
||||
* mitigation until it is next scheduled.
|
||||
*/
|
||||
if (task == current && update)
|
||||
speculative_store_bypass_update_current();
|
||||
|
||||
static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
|
||||
{
|
||||
switch (ctrl) {
|
||||
case PR_SPEC_ENABLE:
|
||||
if (spectre_v2_user == SPECTRE_V2_USER_NONE)
|
||||
return 0;
|
||||
/*
|
||||
* Indirect branch speculation is always disabled in strict
|
||||
* mode.
|
||||
*/
|
||||
if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
|
||||
return -EPERM;
|
||||
task_clear_spec_ib_disable(task);
|
||||
task_update_spec_tif(task);
|
||||
break;
|
||||
case PR_SPEC_DISABLE:
|
||||
case PR_SPEC_FORCE_DISABLE:
|
||||
/*
|
||||
* Indirect branch speculation is always allowed when
|
||||
* mitigation is force disabled.
|
||||
*/
|
||||
if (spectre_v2_user == SPECTRE_V2_USER_NONE)
|
||||
return -EPERM;
|
||||
if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
|
||||
return 0;
|
||||
task_set_spec_ib_disable(task);
|
||||
if (ctrl == PR_SPEC_FORCE_DISABLE)
|
||||
task_set_spec_ib_force_disable(task);
|
||||
task_update_spec_tif(task);
|
||||
break;
|
||||
default:
|
||||
return -ERANGE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -606,6 +841,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
|
||||
switch (which) {
|
||||
case PR_SPEC_STORE_BYPASS:
|
||||
return ssb_prctl_set(task, ctrl);
|
||||
case PR_SPEC_INDIRECT_BRANCH:
|
||||
return ib_prctl_set(task, ctrl);
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
@@ -616,6 +853,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
|
||||
{
|
||||
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
|
||||
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
|
||||
if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
|
||||
ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -638,11 +877,35 @@ static int ssb_prctl_get(struct task_struct *task)
|
||||
}
|
||||
}
|
||||
|
||||
static int ib_prctl_get(struct task_struct *task)
|
||||
{
|
||||
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
||||
return PR_SPEC_NOT_AFFECTED;
|
||||
|
||||
switch (spectre_v2_user) {
|
||||
case SPECTRE_V2_USER_NONE:
|
||||
return PR_SPEC_ENABLE;
|
||||
case SPECTRE_V2_USER_PRCTL:
|
||||
case SPECTRE_V2_USER_SECCOMP:
|
||||
if (task_spec_ib_force_disable(task))
|
||||
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
|
||||
if (task_spec_ib_disable(task))
|
||||
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
|
||||
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
|
||||
case SPECTRE_V2_USER_STRICT:
|
||||
return PR_SPEC_DISABLE;
|
||||
default:
|
||||
return PR_SPEC_NOT_AFFECTED;
|
||||
}
|
||||
}
|
||||
|
||||
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
|
||||
{
|
||||
switch (which) {
|
||||
case PR_SPEC_STORE_BYPASS:
|
||||
return ssb_prctl_get(task);
|
||||
case PR_SPEC_INDIRECT_BRANCH:
|
||||
return ib_prctl_get(task);
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
@@ -780,7 +1043,7 @@ early_param("l1tf", l1tf_cmdline);
|
||||
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL)
|
||||
static const char *l1tf_vmx_states[] = {
|
||||
static const char * const l1tf_vmx_states[] = {
|
||||
[VMENTER_L1D_FLUSH_AUTO] = "auto",
|
||||
[VMENTER_L1D_FLUSH_NEVER] = "vulnerable",
|
||||
[VMENTER_L1D_FLUSH_COND] = "conditional cache flushes",
|
||||
@@ -796,13 +1059,14 @@ static ssize_t l1tf_show_state(char *buf)
|
||||
|
||||
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
|
||||
(l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
|
||||
cpu_smt_control == CPU_SMT_ENABLED))
|
||||
sched_smt_active())) {
|
||||
return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
|
||||
l1tf_vmx_states[l1tf_vmx_mitigation]);
|
||||
}
|
||||
|
||||
return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
|
||||
l1tf_vmx_states[l1tf_vmx_mitigation],
|
||||
cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
|
||||
sched_smt_active() ? "vulnerable" : "disabled");
|
||||
}
|
||||
#else
|
||||
static ssize_t l1tf_show_state(char *buf)
|
||||
@@ -811,6 +1075,36 @@ static ssize_t l1tf_show_state(char *buf)
|
||||
}
|
||||
#endif
|
||||
|
||||
static char *stibp_state(void)
|
||||
{
|
||||
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
||||
return "";
|
||||
|
||||
switch (spectre_v2_user) {
|
||||
case SPECTRE_V2_USER_NONE:
|
||||
return ", STIBP: disabled";
|
||||
case SPECTRE_V2_USER_STRICT:
|
||||
return ", STIBP: forced";
|
||||
case SPECTRE_V2_USER_PRCTL:
|
||||
case SPECTRE_V2_USER_SECCOMP:
|
||||
if (static_key_enabled(&switch_to_cond_stibp))
|
||||
return ", STIBP: conditional";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static char *ibpb_state(void)
|
||||
{
|
||||
if (boot_cpu_has(X86_FEATURE_IBPB)) {
|
||||
if (static_key_enabled(&switch_mm_always_ibpb))
|
||||
return ", IBPB: always-on";
|
||||
if (static_key_enabled(&switch_mm_cond_ibpb))
|
||||
return ", IBPB: conditional";
|
||||
return ", IBPB: disabled";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
|
||||
char *buf, unsigned int bug)
|
||||
{
|
||||
@@ -828,9 +1122,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
|
||||
return sprintf(buf, "Mitigation: __user pointer sanitization\n");
|
||||
|
||||
case X86_BUG_SPECTRE_V2:
|
||||
return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
||||
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
|
||||
return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
||||
ibpb_state(),
|
||||
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
|
||||
stibp_state(),
|
||||
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
|
||||
spectre_v2_module_string());
|
||||
|
||||
case X86_BUG_SPEC_STORE_BYPASS:
|
||||
|
||||
@@ -760,6 +760,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
|
||||
set_cpu_cap(c, X86_FEATURE_STIBP);
|
||||
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
|
||||
set_cpu_cap(c, X86_FEATURE_SSBD);
|
||||
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
|
||||
clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
|
||||
}
|
||||
}
|
||||
|
||||
void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
@@ -958,7 +964,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
||||
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
|
||||
|
||||
if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
|
||||
!(ia32_cap & ARCH_CAP_SSB_NO))
|
||||
!(ia32_cap & ARCH_CAP_SSB_NO) &&
|
||||
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
|
||||
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
|
||||
|
||||
if (x86_match_cpu(cpu_no_speculation))
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
|
||||
#define SMCA_THR_LVT_OFF 0xF000
|
||||
|
||||
static bool thresholding_en;
|
||||
static bool thresholding_irq_en;
|
||||
|
||||
static const char * const th_names[] = {
|
||||
"load_store",
|
||||
@@ -533,9 +533,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
|
||||
|
||||
set_offset:
|
||||
offset = setup_APIC_mce_threshold(offset, new);
|
||||
|
||||
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
|
||||
mce_threshold_vector = amd_threshold_interrupt;
|
||||
if (offset == new)
|
||||
thresholding_irq_en = true;
|
||||
|
||||
done:
|
||||
mce_threshold_block_init(&b, offset);
|
||||
@@ -1356,9 +1355,6 @@ int mce_threshold_remove_device(unsigned int cpu)
|
||||
{
|
||||
unsigned int bank;
|
||||
|
||||
if (!thresholding_en)
|
||||
return 0;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
@@ -1376,9 +1372,6 @@ int mce_threshold_create_device(unsigned int cpu)
|
||||
struct threshold_bank **bp;
|
||||
int err = 0;
|
||||
|
||||
if (!thresholding_en)
|
||||
return 0;
|
||||
|
||||
bp = per_cpu(threshold_banks, cpu);
|
||||
if (bp)
|
||||
return 0;
|
||||
@@ -1407,9 +1400,6 @@ static __init int threshold_init_device(void)
|
||||
{
|
||||
unsigned lcpu = 0;
|
||||
|
||||
if (mce_threshold_vector == amd_threshold_interrupt)
|
||||
thresholding_en = true;
|
||||
|
||||
/* to hit CPUs online before the notifier is up */
|
||||
for_each_online_cpu(lcpu) {
|
||||
int err = mce_threshold_create_device(lcpu);
|
||||
@@ -1418,6 +1408,9 @@ static __init int threshold_init_device(void)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (thresholding_irq_en)
|
||||
mce_threshold_vector = amd_threshold_interrupt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
|
||||
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
|
||||
}
|
||||
|
||||
local_bh_disable();
|
||||
fpu->initialized = 1;
|
||||
preempt_disable();
|
||||
fpu__restore(fpu);
|
||||
preempt_enable();
|
||||
local_bh_enable();
|
||||
|
||||
return err;
|
||||
} else {
|
||||
|
||||
@@ -41,6 +41,8 @@
|
||||
#include <asm/prctl.h>
|
||||
#include <asm/spec-ctrl.h>
|
||||
|
||||
#include "process.h"
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
* no more per-task TSS's. The TSS size is kept cacheline-aligned
|
||||
@@ -255,11 +257,12 @@ void arch_setup_new_exec(void)
|
||||
enable_cpuid();
|
||||
}
|
||||
|
||||
static inline void switch_to_bitmap(struct tss_struct *tss,
|
||||
struct thread_struct *prev,
|
||||
static inline void switch_to_bitmap(struct thread_struct *prev,
|
||||
struct thread_struct *next,
|
||||
unsigned long tifp, unsigned long tifn)
|
||||
{
|
||||
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
|
||||
|
||||
if (tifn & _TIF_IO_BITMAP) {
|
||||
/*
|
||||
* Copy the relevant range of the IO bitmap.
|
||||
@@ -398,32 +401,85 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
|
||||
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
|
||||
}
|
||||
|
||||
static __always_inline void intel_set_ssb_state(unsigned long tifn)
|
||||
/*
|
||||
* Update the MSRs managing speculation control, during context switch.
|
||||
*
|
||||
* tifp: Previous task's thread flags
|
||||
* tifn: Next task's thread flags
|
||||
*/
|
||||
static __always_inline void __speculation_ctrl_update(unsigned long tifp,
|
||||
unsigned long tifn)
|
||||
{
|
||||
u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
|
||||
unsigned long tif_diff = tifp ^ tifn;
|
||||
u64 msr = x86_spec_ctrl_base;
|
||||
bool updmsr = false;
|
||||
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
|
||||
/*
|
||||
* If TIF_SSBD is different, select the proper mitigation
|
||||
* method. Note that if SSBD mitigation is disabled or permanentely
|
||||
* enabled this branch can't be taken because nothing can set
|
||||
* TIF_SSBD.
|
||||
*/
|
||||
if (tif_diff & _TIF_SSBD) {
|
||||
if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
|
||||
amd_set_ssb_virt_state(tifn);
|
||||
} else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
|
||||
amd_set_core_ssb_state(tifn);
|
||||
} else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
|
||||
static_cpu_has(X86_FEATURE_AMD_SSBD)) {
|
||||
msr |= ssbd_tif_to_spec_ctrl(tifn);
|
||||
updmsr = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
|
||||
* otherwise avoid the MSR write.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_SMP) &&
|
||||
static_branch_unlikely(&switch_to_cond_stibp)) {
|
||||
updmsr |= !!(tif_diff & _TIF_SPEC_IB);
|
||||
msr |= stibp_tif_to_spec_ctrl(tifn);
|
||||
}
|
||||
|
||||
if (updmsr)
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
|
||||
}
|
||||
|
||||
static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
|
||||
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
|
||||
amd_set_ssb_virt_state(tifn);
|
||||
else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
|
||||
amd_set_core_ssb_state(tifn);
|
||||
else
|
||||
intel_set_ssb_state(tifn);
|
||||
if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
|
||||
if (task_spec_ssb_disable(tsk))
|
||||
set_tsk_thread_flag(tsk, TIF_SSBD);
|
||||
else
|
||||
clear_tsk_thread_flag(tsk, TIF_SSBD);
|
||||
|
||||
if (task_spec_ib_disable(tsk))
|
||||
set_tsk_thread_flag(tsk, TIF_SPEC_IB);
|
||||
else
|
||||
clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
|
||||
}
|
||||
/* Return the updated threadinfo flags*/
|
||||
return task_thread_info(tsk)->flags;
|
||||
}
|
||||
|
||||
void speculative_store_bypass_update(unsigned long tif)
|
||||
void speculation_ctrl_update(unsigned long tif)
|
||||
{
|
||||
/* Forced update. Make sure all relevant TIF flags are different */
|
||||
preempt_disable();
|
||||
__speculative_store_bypass_update(tif);
|
||||
__speculation_ctrl_update(~tif, tif);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
struct tss_struct *tss)
|
||||
/* Called from seccomp/prctl update */
|
||||
void speculation_ctrl_update_current(void)
|
||||
{
|
||||
preempt_disable();
|
||||
speculation_ctrl_update(speculation_ctrl_update_tif(current));
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
{
|
||||
struct thread_struct *prev, *next;
|
||||
unsigned long tifp, tifn;
|
||||
@@ -433,7 +489,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
|
||||
tifn = READ_ONCE(task_thread_info(next_p)->flags);
|
||||
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
|
||||
switch_to_bitmap(tss, prev, next, tifp, tifn);
|
||||
switch_to_bitmap(prev, next, tifp, tifn);
|
||||
|
||||
propagate_user_return_notify(prev_p, next_p);
|
||||
|
||||
@@ -454,8 +510,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
if ((tifp ^ tifn) & _TIF_NOCPUID)
|
||||
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
|
||||
|
||||
if ((tifp ^ tifn) & _TIF_SSBD)
|
||||
__speculative_store_bypass_update(tifn);
|
||||
if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
|
||||
__speculation_ctrl_update(tifp, tifn);
|
||||
} else {
|
||||
speculation_ctrl_update_tif(prev_p);
|
||||
tifn = speculation_ctrl_update_tif(next_p);
|
||||
|
||||
/* Enforce MSR update to ensure consistent state */
|
||||
__speculation_ctrl_update(~tifn, tifn);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
39
arch/x86/kernel/process.h
Normal file
39
arch/x86/kernel/process.h
Normal file
@@ -0,0 +1,39 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
//
|
||||
// Code shared between 32 and 64 bit
|
||||
|
||||
#include <asm/spec-ctrl.h>
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
|
||||
|
||||
/*
|
||||
* This needs to be inline to optimize for the common case where no extra
|
||||
* work needs to be done.
|
||||
*/
|
||||
static inline void switch_to_extra(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
unsigned long next_tif = task_thread_info(next)->flags;
|
||||
unsigned long prev_tif = task_thread_info(prev)->flags;
|
||||
|
||||
if (IS_ENABLED(CONFIG_SMP)) {
|
||||
/*
|
||||
* Avoid __switch_to_xtra() invocation when conditional
|
||||
* STIPB is disabled and the only different bit is
|
||||
* TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
|
||||
* in the TIF_WORK_CTXSW masks.
|
||||
*/
|
||||
if (!static_branch_likely(&switch_to_cond_stibp)) {
|
||||
prev_tif &= ~_TIF_SPEC_IB;
|
||||
next_tif &= ~_TIF_SPEC_IB;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* __switch_to_xtra() handles debug registers, i/o bitmaps,
|
||||
* speculation mitigations etc.
|
||||
*/
|
||||
if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
|
||||
prev_tif & _TIF_WORK_CTXSW_PREV))
|
||||
__switch_to_xtra(prev, next);
|
||||
}
|
||||
@@ -59,6 +59,8 @@
|
||||
#include <asm/intel_rdt_sched.h>
|
||||
#include <asm/proto.h>
|
||||
|
||||
#include "process.h"
|
||||
|
||||
void __show_regs(struct pt_regs *regs, int all)
|
||||
{
|
||||
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
|
||||
@@ -234,7 +236,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct fpu *prev_fpu = &prev->fpu;
|
||||
struct fpu *next_fpu = &next->fpu;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||
|
||||
@@ -266,12 +267,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
|
||||
set_iopl_mask(next->iopl);
|
||||
|
||||
/*
|
||||
* Now maybe handle debug registers and/or IO bitmaps
|
||||
*/
|
||||
if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
|
||||
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
|
||||
__switch_to_xtra(prev_p, next_p, tss);
|
||||
switch_to_extra(prev_p, next_p);
|
||||
|
||||
/*
|
||||
* Leave lazy mode, flushing any hypercalls made here.
|
||||
|
||||
@@ -59,6 +59,8 @@
|
||||
#include <asm/unistd_32_ia32.h>
|
||||
#endif
|
||||
|
||||
#include "process.h"
|
||||
|
||||
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
|
||||
|
||||
/* Prints also some state that isn't saved in the pt_regs */
|
||||
@@ -400,7 +402,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct fpu *prev_fpu = &prev->fpu;
|
||||
struct fpu *next_fpu = &next->fpu;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||||
this_cpu_read(irq_count) != -1);
|
||||
@@ -467,12 +468,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
/* Reload sp0. */
|
||||
update_sp0(next_p);
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||
*/
|
||||
if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
|
||||
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
|
||||
__switch_to_xtra(prev_p, next_p, tss);
|
||||
__switch_to_xtra(prev_p, next_p);
|
||||
|
||||
#ifdef CONFIG_XEN_PV
|
||||
/*
|
||||
|
||||
@@ -367,7 +367,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
|
||||
/* cpuid 0x80000008.ebx */
|
||||
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
|
||||
F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
|
||||
F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
|
||||
F(AMD_SSB_NO);
|
||||
|
||||
/* cpuid 0xC0000001.edx */
|
||||
const u32 kvm_cpuid_C000_0001_edx_x86_features =
|
||||
@@ -649,7 +650,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
entry->ebx |= F(VIRT_SSBD);
|
||||
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
|
||||
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
|
||||
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
|
||||
/*
|
||||
* The preference is to use SPEC CTRL MSR instead of the
|
||||
* VIRT_SPEC MSR.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
|
||||
!boot_cpu_has(X86_FEATURE_AMD_SSBD))
|
||||
entry->ebx |= F(VIRT_SSBD);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4734,9 +4734,9 @@ static bool need_remote_flush(u64 old, u64 new)
|
||||
}
|
||||
|
||||
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
||||
const u8 *new, int *bytes)
|
||||
int *bytes)
|
||||
{
|
||||
u64 gentry;
|
||||
u64 gentry = 0;
|
||||
int r;
|
||||
|
||||
/*
|
||||
@@ -4748,22 +4748,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
||||
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
|
||||
*gpa &= ~(gpa_t)7;
|
||||
*bytes = 8;
|
||||
r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
|
||||
if (r)
|
||||
gentry = 0;
|
||||
new = (const u8 *)&gentry;
|
||||
}
|
||||
|
||||
switch (*bytes) {
|
||||
case 4:
|
||||
gentry = *(const u32 *)new;
|
||||
break;
|
||||
case 8:
|
||||
gentry = *(const u64 *)new;
|
||||
break;
|
||||
default:
|
||||
gentry = 0;
|
||||
break;
|
||||
if (*bytes == 4 || *bytes == 8) {
|
||||
r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
|
||||
if (r)
|
||||
gentry = 0;
|
||||
}
|
||||
|
||||
return gentry;
|
||||
@@ -4876,8 +4866,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
|
||||
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
|
||||
|
||||
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
|
||||
|
||||
/*
|
||||
* No need to care whether allocation memory is successful
|
||||
* or not since pte prefetch is skiped if it does not have
|
||||
@@ -4886,6 +4874,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
mmu_topup_memory_caches(vcpu);
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
|
||||
|
||||
++vcpu->kvm->stat.mmu_pte_write;
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
|
||||
|
||||
|
||||
@@ -1733,21 +1733,31 @@ out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void svm_clear_current_vmcb(struct vmcb *vmcb)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_online_cpu(i)
|
||||
cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
|
||||
}
|
||||
|
||||
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* The vmcb page can be recycled, causing a false negative in
|
||||
* svm_vcpu_load(). So, ensure that no logical CPU has this
|
||||
* vmcb page recorded as its current vmcb.
|
||||
*/
|
||||
svm_clear_current_vmcb(svm->vmcb);
|
||||
|
||||
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
|
||||
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
|
||||
__free_page(virt_to_page(svm->nested.hsave));
|
||||
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, svm);
|
||||
/*
|
||||
* The vmcb page can be recycled, causing a false negative in
|
||||
* svm_vcpu_load(). So do a full IBPB now.
|
||||
*/
|
||||
indirect_branch_prediction_barrier();
|
||||
}
|
||||
|
||||
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
@@ -3644,7 +3654,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
break;
|
||||
case MSR_IA32_SPEC_CTRL:
|
||||
if (!msr_info->host_initiated &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
|
||||
return 1;
|
||||
|
||||
msr_info->data = svm->spec_ctrl;
|
||||
@@ -3749,11 +3760,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
break;
|
||||
case MSR_IA32_SPEC_CTRL:
|
||||
if (!msr->host_initiated &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
|
||||
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
|
||||
return 1;
|
||||
|
||||
/* The STIBP bit doesn't fault even if it's not advertised */
|
||||
if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
|
||||
if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
|
||||
return 1;
|
||||
|
||||
svm->spec_ctrl = data;
|
||||
|
||||
@@ -6378,6 +6378,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
|
||||
clock_pairing.nsec = ts.tv_nsec;
|
||||
clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
|
||||
clock_pairing.flags = 0;
|
||||
memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
|
||||
|
||||
ret = 0;
|
||||
if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
|
||||
@@ -6884,7 +6885,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
||||
else {
|
||||
if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
|
||||
kvm_x86_ops->sync_pir_to_irr(vcpu);
|
||||
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||
if (ioapic_in_kernel(vcpu->kvm))
|
||||
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||
}
|
||||
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
|
||||
vcpu_to_synic(vcpu)->vec_bitmap, 256);
|
||||
|
||||
@@ -29,6 +29,12 @@
|
||||
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
||||
*/
|
||||
|
||||
/*
|
||||
* Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
|
||||
* stored in cpu_tlb_state.last_user_mm_ibpb.
|
||||
*/
|
||||
#define LAST_USER_MM_IBPB 0x1UL
|
||||
|
||||
/*
|
||||
* We get here when we do something requiring a TLB invalidation
|
||||
* but could not go invalidate all of the contexts. We do the
|
||||
@@ -180,6 +186,89 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
|
||||
{
|
||||
unsigned long next_tif = task_thread_info(next)->flags;
|
||||
unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
|
||||
|
||||
return (unsigned long)next->mm | ibpb;
|
||||
}
|
||||
|
||||
static void cond_ibpb(struct task_struct *next)
|
||||
{
|
||||
if (!next || !next->mm)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Both, the conditional and the always IBPB mode use the mm
|
||||
* pointer to avoid the IBPB when switching between tasks of the
|
||||
* same process. Using the mm pointer instead of mm->context.ctx_id
|
||||
* opens a hypothetical hole vs. mm_struct reuse, which is more or
|
||||
* less impossible to control by an attacker. Aside of that it
|
||||
* would only affect the first schedule so the theoretically
|
||||
* exposed data is not really interesting.
|
||||
*/
|
||||
if (static_branch_likely(&switch_mm_cond_ibpb)) {
|
||||
unsigned long prev_mm, next_mm;
|
||||
|
||||
/*
|
||||
* This is a bit more complex than the always mode because
|
||||
* it has to handle two cases:
|
||||
*
|
||||
* 1) Switch from a user space task (potential attacker)
|
||||
* which has TIF_SPEC_IB set to a user space task
|
||||
* (potential victim) which has TIF_SPEC_IB not set.
|
||||
*
|
||||
* 2) Switch from a user space task (potential attacker)
|
||||
* which has TIF_SPEC_IB not set to a user space task
|
||||
* (potential victim) which has TIF_SPEC_IB set.
|
||||
*
|
||||
* This could be done by unconditionally issuing IBPB when
|
||||
* a task which has TIF_SPEC_IB set is either scheduled in
|
||||
* or out. Though that results in two flushes when:
|
||||
*
|
||||
* - the same user space task is scheduled out and later
|
||||
* scheduled in again and only a kernel thread ran in
|
||||
* between.
|
||||
*
|
||||
* - a user space task belonging to the same process is
|
||||
* scheduled in after a kernel thread ran in between
|
||||
*
|
||||
* - a user space task belonging to the same process is
|
||||
* scheduled in immediately.
|
||||
*
|
||||
* Optimize this with reasonably small overhead for the
|
||||
* above cases. Mangle the TIF_SPEC_IB bit into the mm
|
||||
* pointer of the incoming task which is stored in
|
||||
* cpu_tlbstate.last_user_mm_ibpb for comparison.
|
||||
*/
|
||||
next_mm = mm_mangle_tif_spec_ib(next);
|
||||
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
|
||||
|
||||
/*
|
||||
* Issue IBPB only if the mm's are different and one or
|
||||
* both have the IBPB bit set.
|
||||
*/
|
||||
if (next_mm != prev_mm &&
|
||||
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
|
||||
indirect_branch_prediction_barrier();
|
||||
|
||||
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
|
||||
}
|
||||
|
||||
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
|
||||
/*
|
||||
* Only flush when switching to a user space task with a
|
||||
* different context than the user space task which ran
|
||||
* last on this CPU.
|
||||
*/
|
||||
if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
|
||||
indirect_branch_prediction_barrier();
|
||||
this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
@@ -248,27 +337,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
} else {
|
||||
u16 new_asid;
|
||||
bool need_flush;
|
||||
u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
|
||||
|
||||
/*
|
||||
* Avoid user/user BTB poisoning by flushing the branch
|
||||
* predictor when switching between processes. This stops
|
||||
* one process from doing Spectre-v2 attacks on another.
|
||||
*
|
||||
* As an optimization, flush indirect branches only when
|
||||
* switching into processes that disable dumping. This
|
||||
* protects high value processes like gpg, without having
|
||||
* too high performance overhead. IBPB is *expensive*!
|
||||
*
|
||||
* This will not flush branches when switching into kernel
|
||||
* threads. It will also not flush if we switch to idle
|
||||
* thread and back to the same process. It will flush if we
|
||||
* switch to a different non-dumpable process.
|
||||
*/
|
||||
if (tsk && tsk->mm &&
|
||||
tsk->mm->context.ctx_id != last_ctx_id &&
|
||||
get_dumpable(tsk->mm) != SUID_DUMP_USER)
|
||||
indirect_branch_prediction_barrier();
|
||||
cond_ibpb(tsk);
|
||||
|
||||
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
/*
|
||||
@@ -318,14 +393,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record last user mm's context id, so we can avoid
|
||||
* flushing branch buffer with IBPB if we switch back
|
||||
* to the same user.
|
||||
*/
|
||||
if (next != &init_mm)
|
||||
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
|
||||
|
||||
/* Make sure we write CR3 before loaded_mm. */
|
||||
barrier();
|
||||
|
||||
@@ -406,7 +473,7 @@ void initialize_tlbstate_and_flush(void)
|
||||
write_cr3(build_cr3(mm->pgd, 0));
|
||||
|
||||
/* Reinitialize tlbstate. */
|
||||
this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
|
||||
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
|
||||
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
||||
this_cpu_write(cpu_tlbstate.next_asid, 1);
|
||||
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
|
||||
|
||||
@@ -91,14 +91,14 @@ int main(void)
|
||||
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
|
||||
DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
|
||||
#if XTENSA_HAVE_COPROCESSORS
|
||||
DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
|
||||
DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
|
||||
DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
|
||||
DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
|
||||
DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
|
||||
DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
|
||||
DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
|
||||
DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
|
||||
DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
|
||||
#endif
|
||||
DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
|
||||
DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
|
||||
|
||||
@@ -88,18 +88,21 @@ void coprocessor_release_all(struct thread_info *ti)
|
||||
|
||||
void coprocessor_flush_all(struct thread_info *ti)
|
||||
{
|
||||
unsigned long cpenable;
|
||||
unsigned long cpenable, old_cpenable;
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
RSR_CPENABLE(old_cpenable);
|
||||
cpenable = ti->cpenable;
|
||||
WSR_CPENABLE(cpenable);
|
||||
|
||||
for (i = 0; i < XCHAL_CP_MAX; i++) {
|
||||
if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
|
||||
coprocessor_flush(ti, i);
|
||||
cpenable >>= 1;
|
||||
}
|
||||
WSR_CPENABLE(old_cpenable);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
|
||||
}
|
||||
|
||||
|
||||
#if XTENSA_HAVE_COPROCESSORS
|
||||
#define CP_OFFSETS(cp) \
|
||||
{ \
|
||||
.elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
|
||||
.ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
|
||||
.sz = sizeof(xtregs_ ## cp ## _t), \
|
||||
}
|
||||
|
||||
static const struct {
|
||||
size_t elf_xtregs_offset;
|
||||
size_t ti_offset;
|
||||
size_t sz;
|
||||
} cp_offsets[] = {
|
||||
CP_OFFSETS(cp0),
|
||||
CP_OFFSETS(cp1),
|
||||
CP_OFFSETS(cp2),
|
||||
CP_OFFSETS(cp3),
|
||||
CP_OFFSETS(cp4),
|
||||
CP_OFFSETS(cp5),
|
||||
CP_OFFSETS(cp6),
|
||||
CP_OFFSETS(cp7),
|
||||
};
|
||||
#endif
|
||||
|
||||
static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
|
||||
{
|
||||
struct pt_regs *regs = task_pt_regs(child);
|
||||
struct thread_info *ti = task_thread_info(child);
|
||||
elf_xtregs_t __user *xtregs = uregs;
|
||||
int ret = 0;
|
||||
int i __maybe_unused;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
|
||||
return -EIO;
|
||||
@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
|
||||
#if XTENSA_HAVE_COPROCESSORS
|
||||
/* Flush all coprocessor registers to memory. */
|
||||
coprocessor_flush_all(ti);
|
||||
ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
|
||||
sizeof(xtregs_coprocessor_t));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
|
||||
ret |= __copy_to_user((char __user *)xtregs +
|
||||
cp_offsets[i].elf_xtregs_offset,
|
||||
(const char *)ti +
|
||||
cp_offsets[i].ti_offset,
|
||||
cp_offsets[i].sz);
|
||||
#endif
|
||||
ret |= __copy_to_user(&xtregs->opt, ®s->xtregs_opt,
|
||||
sizeof(xtregs->opt));
|
||||
@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
|
||||
struct pt_regs *regs = task_pt_regs(child);
|
||||
elf_xtregs_t *xtregs = uregs;
|
||||
int ret = 0;
|
||||
int i __maybe_unused;
|
||||
|
||||
if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
|
||||
return -EFAULT;
|
||||
@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
|
||||
coprocessor_flush_all(ti);
|
||||
coprocessor_release_all(ti);
|
||||
|
||||
ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
|
||||
sizeof(xtregs_coprocessor_t));
|
||||
for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
|
||||
ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
|
||||
(const char __user *)xtregs +
|
||||
cp_offsets[i].elf_xtregs_offset,
|
||||
cp_offsets[i].sz);
|
||||
#endif
|
||||
ret |= __copy_from_user(®s->xtregs_opt, &xtregs->opt,
|
||||
sizeof(xtregs->opt));
|
||||
|
||||
@@ -3136,7 +3136,6 @@ static void binder_transaction(struct binder_proc *proc,
|
||||
t->buffer = NULL;
|
||||
goto err_binder_alloc_buf_failed;
|
||||
}
|
||||
t->buffer->allow_user_free = 0;
|
||||
t->buffer->debug_id = t->debug_id;
|
||||
t->buffer->transaction = t;
|
||||
t->buffer->target_node = target_node;
|
||||
@@ -3632,14 +3631,18 @@ static int binder_thread_write(struct binder_proc *proc,
|
||||
|
||||
buffer = binder_alloc_prepare_to_free(&proc->alloc,
|
||||
data_ptr);
|
||||
if (buffer == NULL) {
|
||||
binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
|
||||
proc->pid, thread->pid, (u64)data_ptr);
|
||||
break;
|
||||
}
|
||||
if (!buffer->allow_user_free) {
|
||||
binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
|
||||
proc->pid, thread->pid, (u64)data_ptr);
|
||||
if (IS_ERR_OR_NULL(buffer)) {
|
||||
if (PTR_ERR(buffer) == -EPERM) {
|
||||
binder_user_error(
|
||||
"%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
|
||||
proc->pid, thread->pid,
|
||||
(u64)data_ptr);
|
||||
} else {
|
||||
binder_user_error(
|
||||
"%d:%d BC_FREE_BUFFER u%016llx no match\n",
|
||||
proc->pid, thread->pid,
|
||||
(u64)data_ptr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
binder_debug(BINDER_DEBUG_FREE_BUFFER,
|
||||
|
||||
@@ -149,14 +149,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
|
||||
else {
|
||||
/*
|
||||
* Guard against user threads attempting to
|
||||
* free the buffer twice
|
||||
* free the buffer when in use by kernel or
|
||||
* after it's already been freed.
|
||||
*/
|
||||
if (buffer->free_in_progress) {
|
||||
pr_err("%d:%d FREE_BUFFER u%016llx user freed buffer twice\n",
|
||||
alloc->pid, current->pid, (u64)user_ptr);
|
||||
return NULL;
|
||||
}
|
||||
buffer->free_in_progress = 1;
|
||||
if (!buffer->allow_user_free)
|
||||
return ERR_PTR(-EPERM);
|
||||
buffer->allow_user_free = 0;
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
@@ -490,7 +488,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
|
||||
|
||||
rb_erase(best_fit, &alloc->free_buffers);
|
||||
buffer->free = 0;
|
||||
buffer->free_in_progress = 0;
|
||||
buffer->allow_user_free = 0;
|
||||
binder_insert_allocated_buffer_locked(alloc, buffer);
|
||||
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
|
||||
"%d: binder_alloc_buf size %zd got %pK\n",
|
||||
|
||||
@@ -50,8 +50,7 @@ struct binder_buffer {
|
||||
unsigned free:1;
|
||||
unsigned allow_user_free:1;
|
||||
unsigned async_transaction:1;
|
||||
unsigned free_in_progress:1;
|
||||
unsigned debug_id:28;
|
||||
unsigned debug_id:29;
|
||||
|
||||
struct binder_transaction *transaction;
|
||||
|
||||
|
||||
@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan)
|
||||
atchan->descs_allocated = 0;
|
||||
atchan->status = 0;
|
||||
|
||||
/*
|
||||
* Free atslave allocated in at_dma_xlate()
|
||||
*/
|
||||
kfree(chan->private);
|
||||
chan->private = NULL;
|
||||
|
||||
dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
|
||||
}
|
||||
|
||||
@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
|
||||
dma_cap_zero(mask);
|
||||
dma_cap_set(DMA_SLAVE, mask);
|
||||
|
||||
atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
|
||||
atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
|
||||
if (!atslave)
|
||||
return NULL;
|
||||
|
||||
@@ -2000,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev)
|
||||
struct resource *io;
|
||||
|
||||
at_dma_off(atdma);
|
||||
if (pdev->dev.of_node)
|
||||
of_dma_controller_free(pdev->dev.of_node);
|
||||
dma_async_device_unregister(&atdma->dma_common);
|
||||
|
||||
dma_pool_destroy(atdma->memset_pool);
|
||||
|
||||
@@ -454,6 +454,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
|
||||
}
|
||||
wait_for_completion(&msginfo->waitevent);
|
||||
|
||||
if (msginfo->response.gpadl_created.creation_status != 0) {
|
||||
pr_err("Failed to establish GPADL: err = 0x%x\n",
|
||||
msginfo->response.gpadl_created.creation_status);
|
||||
|
||||
ret = -EDQUOT;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (channel->rescind) {
|
||||
ret = -ENODEV;
|
||||
goto cleanup;
|
||||
|
||||
@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
|
||||
return st_sensors_set_dataready_irq(indio_dev, state);
|
||||
}
|
||||
|
||||
static int st_magn_buffer_preenable(struct iio_dev *indio_dev)
|
||||
{
|
||||
return st_sensors_set_enable(indio_dev, true);
|
||||
}
|
||||
|
||||
static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
|
||||
{
|
||||
int err;
|
||||
@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
|
||||
if (err < 0)
|
||||
goto st_magn_buffer_postenable_error;
|
||||
|
||||
return err;
|
||||
return st_sensors_set_enable(indio_dev, true);
|
||||
|
||||
st_magn_buffer_postenable_error:
|
||||
kfree(mdata->buffer_data);
|
||||
@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
|
||||
int err;
|
||||
struct st_sensor_data *mdata = iio_priv(indio_dev);
|
||||
|
||||
err = iio_triggered_buffer_predisable(indio_dev);
|
||||
err = st_sensors_set_enable(indio_dev, false);
|
||||
if (err < 0)
|
||||
goto st_magn_buffer_predisable_error;
|
||||
|
||||
err = st_sensors_set_enable(indio_dev, false);
|
||||
err = iio_triggered_buffer_predisable(indio_dev);
|
||||
|
||||
st_magn_buffer_predisable_error:
|
||||
kfree(mdata->buffer_data);
|
||||
@@ -75,7 +70,6 @@ st_magn_buffer_predisable_error:
|
||||
}
|
||||
|
||||
static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
|
||||
.preenable = &st_magn_buffer_preenable,
|
||||
.postenable = &st_magn_buffer_postenable,
|
||||
.predisable = &st_magn_buffer_predisable,
|
||||
};
|
||||
|
||||
@@ -2105,6 +2105,8 @@ static int em28xx_dvb_fini(struct em28xx *dev)
|
||||
}
|
||||
}
|
||||
|
||||
em28xx_unregister_dvb(dvb);
|
||||
|
||||
/* remove I2C SEC */
|
||||
client = dvb->i2c_client_sec;
|
||||
if (client) {
|
||||
@@ -2126,7 +2128,6 @@ static int em28xx_dvb_fini(struct em28xx *dev)
|
||||
i2c_unregister_device(client);
|
||||
}
|
||||
|
||||
em28xx_unregister_dvb(dvb);
|
||||
kfree(dvb);
|
||||
dev->dvb = NULL;
|
||||
kref_put(&dev->ref, em28xx_free_device);
|
||||
|
||||
@@ -417,7 +417,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev,
|
||||
if (err)
|
||||
goto error_window;
|
||||
err = scif_map_page(&window->num_pages_lookup.lookup[j],
|
||||
vmalloc_dma_phys ?
|
||||
vmalloc_num_pages ?
|
||||
vmalloc_to_page(&window->num_pages[i]) :
|
||||
virt_to_page(&window->num_pages[i]),
|
||||
remote_dev);
|
||||
|
||||
@@ -578,6 +578,16 @@ static int init_volumes(struct ubi_device *ubi,
|
||||
vol->ubi = ubi;
|
||||
reserved_pebs += vol->reserved_pebs;
|
||||
|
||||
/*
|
||||
* We use ubi->peb_count and not vol->reserved_pebs because
|
||||
* we want to keep the code simple. Otherwise we'd have to
|
||||
* resize/check the bitmap upon volume resize too.
|
||||
* Allocating a few bytes more does not hurt.
|
||||
*/
|
||||
err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* In case of dynamic volume UBI knows nothing about how many
|
||||
* data is stored there. So assume the whole volume is used.
|
||||
@@ -620,16 +630,6 @@ static int init_volumes(struct ubi_device *ubi,
|
||||
(long long)(vol->used_ebs - 1) * vol->usable_leb_size;
|
||||
vol->used_bytes += av->last_data_size;
|
||||
vol->last_eb_bytes = av->last_data_size;
|
||||
|
||||
/*
|
||||
* We use ubi->peb_count and not vol->reserved_pebs because
|
||||
* we want to keep the code simple. Otherwise we'd have to
|
||||
* resize/check the bitmap upon volume resize too.
|
||||
* Allocating a few bytes more does not hurt.
|
||||
*/
|
||||
err = ubi_fastmap_init_checkmap(vol, ubi->peb_count);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* And add the layout volume */
|
||||
|
||||
@@ -1691,6 +1691,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
|
||||
bool if_up = netif_running(nic->netdev);
|
||||
struct bpf_prog *old_prog;
|
||||
bool bpf_attached = false;
|
||||
int ret = 0;
|
||||
|
||||
/* For now just support only the usual MTU sized frames */
|
||||
if (prog && (dev->mtu > 1500)) {
|
||||
@@ -1724,8 +1725,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
|
||||
if (nic->xdp_prog) {
|
||||
/* Attach BPF program */
|
||||
nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
|
||||
if (!IS_ERR(nic->xdp_prog))
|
||||
if (!IS_ERR(nic->xdp_prog)) {
|
||||
bpf_attached = true;
|
||||
} else {
|
||||
ret = PTR_ERR(nic->xdp_prog);
|
||||
nic->xdp_prog = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate Tx queues needed for XDP and network stack */
|
||||
@@ -1737,7 +1742,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
|
||||
netif_trans_update(nic->netdev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
|
||||
|
||||
@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
|
||||
if (!sq->dmem.base)
|
||||
return;
|
||||
|
||||
if (sq->tso_hdrs)
|
||||
if (sq->tso_hdrs) {
|
||||
dma_free_coherent(&nic->pdev->dev,
|
||||
sq->dmem.q_len * TSO_HEADER_SIZE,
|
||||
sq->tso_hdrs, sq->tso_hdrs_phys);
|
||||
sq->tso_hdrs = NULL;
|
||||
}
|
||||
|
||||
/* Free pending skbs in the queue */
|
||||
smp_rmb();
|
||||
|
||||
@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
||||
* it just report sending a packet to the target
|
||||
* (without actual packet transfer).
|
||||
*/
|
||||
dev_kfree_skb_any(skb);
|
||||
ndev->stats.tx_packets++;
|
||||
ndev->stats.tx_bytes += skb->len;
|
||||
dev_kfree_skb_any(skb);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -140,7 +140,6 @@ struct ipheth_device {
|
||||
struct usb_device *udev;
|
||||
struct usb_interface *intf;
|
||||
struct net_device *net;
|
||||
struct sk_buff *tx_skb;
|
||||
struct urb *tx_urb;
|
||||
struct urb *rx_urb;
|
||||
unsigned char *tx_buf;
|
||||
@@ -229,6 +228,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
|
||||
case -ENOENT:
|
||||
case -ECONNRESET:
|
||||
case -ESHUTDOWN:
|
||||
case -EPROTO:
|
||||
return;
|
||||
case 0:
|
||||
break;
|
||||
@@ -280,7 +280,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
|
||||
dev_err(&dev->intf->dev, "%s: urb status: %d\n",
|
||||
__func__, status);
|
||||
|
||||
dev_kfree_skb_irq(dev->tx_skb);
|
||||
netif_wake_queue(dev->net);
|
||||
}
|
||||
|
||||
@@ -410,7 +409,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
|
||||
if (skb->len > IPHETH_BUF_SIZE) {
|
||||
WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
|
||||
dev->net->stats.tx_dropped++;
|
||||
dev_kfree_skb_irq(skb);
|
||||
dev_kfree_skb_any(skb);
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
|
||||
@@ -430,12 +429,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
|
||||
dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
|
||||
__func__, retval);
|
||||
dev->net->stats.tx_errors++;
|
||||
dev_kfree_skb_irq(skb);
|
||||
dev_kfree_skb_any(skb);
|
||||
} else {
|
||||
dev->tx_skb = skb;
|
||||
|
||||
dev->net->stats.tx_packets++;
|
||||
dev->net->stats.tx_bytes += skb->len;
|
||||
dev_consume_skb_any(skb);
|
||||
netif_stop_queue(net);
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +61,8 @@ static const unsigned long guest_offloads[] = {
|
||||
VIRTIO_NET_F_GUEST_TSO4,
|
||||
VIRTIO_NET_F_GUEST_TSO6,
|
||||
VIRTIO_NET_F_GUEST_ECN,
|
||||
VIRTIO_NET_F_GUEST_UFO
|
||||
VIRTIO_NET_F_GUEST_UFO,
|
||||
VIRTIO_NET_F_GUEST_CSUM
|
||||
};
|
||||
|
||||
struct virtnet_stats {
|
||||
@@ -1939,9 +1940,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
|
||||
if (!vi->guest_offloads)
|
||||
return 0;
|
||||
|
||||
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
|
||||
offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
|
||||
|
||||
return virtnet_set_guest_offloads(vi, offloads);
|
||||
}
|
||||
|
||||
@@ -1951,8 +1949,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
|
||||
|
||||
if (!vi->guest_offloads)
|
||||
return 0;
|
||||
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
|
||||
offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
|
||||
|
||||
return virtnet_set_guest_offloads(vi, offloads);
|
||||
}
|
||||
@@ -1970,8 +1966,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
|
||||
&& (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
|
||||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
|
||||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
|
||||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
|
||||
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
|
||||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
|
||||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
|
||||
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
|
||||
@@ -1380,8 +1380,14 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie)
|
||||
};
|
||||
int rc;
|
||||
u16 len = sizeof(struct wmi_set_appie_cmd) + ie_len;
|
||||
struct wmi_set_appie_cmd *cmd = kzalloc(len, GFP_KERNEL);
|
||||
struct wmi_set_appie_cmd *cmd;
|
||||
|
||||
if (len < ie_len) {
|
||||
rc = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
cmd = kzalloc(len, GFP_KERNEL);
|
||||
if (!cmd) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "wl12xx_80211.h"
|
||||
#include "cmd.h"
|
||||
#include "event.h"
|
||||
#include "ps.h"
|
||||
#include "tx.h"
|
||||
#include "hw_ops.h"
|
||||
|
||||
@@ -192,10 +191,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
|
||||
|
||||
timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT);
|
||||
|
||||
ret = wl1271_ps_elp_wakeup(wl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
do {
|
||||
if (time_after(jiffies, timeout_time)) {
|
||||
wl1271_debug(DEBUG_CMD, "timeout waiting for event %d",
|
||||
@@ -227,7 +222,6 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
|
||||
} while (!event);
|
||||
|
||||
out:
|
||||
wl1271_ps_elp_sleep(wl);
|
||||
kfree(events_vector);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -89,7 +89,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PCIE_IATU_NUM; i++)
|
||||
dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
|
||||
dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND);
|
||||
}
|
||||
|
||||
static int ls1021_pcie_link_up(struct dw_pcie *pci)
|
||||
|
||||
@@ -4545,8 +4545,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
||||
{
|
||||
struct qeth_ipa_cmd *cmd;
|
||||
struct qeth_arp_query_info *qinfo;
|
||||
struct qeth_snmp_cmd *snmp;
|
||||
unsigned char *data;
|
||||
void *snmp_data;
|
||||
__u16 data_len;
|
||||
|
||||
QETH_CARD_TEXT(card, 3, "snpcmdcb");
|
||||
@@ -4554,7 +4554,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
||||
cmd = (struct qeth_ipa_cmd *) sdata;
|
||||
data = (unsigned char *)((char *)cmd - reply->offset);
|
||||
qinfo = (struct qeth_arp_query_info *) reply->param;
|
||||
snmp = &cmd->data.setadapterparms.data.snmp;
|
||||
|
||||
if (cmd->hdr.return_code) {
|
||||
QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
|
||||
@@ -4567,10 +4566,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
||||
return 0;
|
||||
}
|
||||
data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
|
||||
if (cmd->data.setadapterparms.hdr.seq_no == 1)
|
||||
data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
|
||||
else
|
||||
data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
|
||||
if (cmd->data.setadapterparms.hdr.seq_no == 1) {
|
||||
snmp_data = &cmd->data.setadapterparms.data.snmp;
|
||||
data_len -= offsetof(struct qeth_ipa_cmd,
|
||||
data.setadapterparms.data.snmp);
|
||||
} else {
|
||||
snmp_data = &cmd->data.setadapterparms.data.snmp.request;
|
||||
data_len -= offsetof(struct qeth_ipa_cmd,
|
||||
data.setadapterparms.data.snmp.request);
|
||||
}
|
||||
|
||||
/* check if there is enough room in userspace */
|
||||
if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
|
||||
@@ -4583,16 +4587,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
|
||||
QETH_CARD_TEXT_(card, 4, "sseqn%i",
|
||||
cmd->data.setadapterparms.hdr.seq_no);
|
||||
/*copy entries to user buffer*/
|
||||
if (cmd->data.setadapterparms.hdr.seq_no == 1) {
|
||||
memcpy(qinfo->udata + qinfo->udata_offset,
|
||||
(char *)snmp,
|
||||
data_len + offsetof(struct qeth_snmp_cmd, data));
|
||||
qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
|
||||
} else {
|
||||
memcpy(qinfo->udata + qinfo->udata_offset,
|
||||
(char *)&snmp->request, data_len);
|
||||
}
|
||||
memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
|
||||
qinfo->udata_offset += data_len;
|
||||
|
||||
/* check if all replies received ... */
|
||||
QETH_CARD_TEXT_(card, 4, "srtot%i",
|
||||
cmd->data.setadapterparms.hdr.used_total);
|
||||
|
||||
@@ -1293,7 +1293,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
|
||||
|
||||
sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
|
||||
sinfo->tx_packets = psta->sta_stats.tx_pkts;
|
||||
|
||||
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
|
||||
}
|
||||
|
||||
/* for Ad-Hoc/AP mode */
|
||||
|
||||
@@ -1461,6 +1461,7 @@ vchiq_compat_ioctl_await_completion(struct file *file,
|
||||
struct vchiq_await_completion32 args32;
|
||||
struct vchiq_completion_data32 completion32;
|
||||
unsigned int *msgbufcount32;
|
||||
unsigned int msgbufcount_native;
|
||||
compat_uptr_t msgbuf32;
|
||||
void *msgbuf;
|
||||
void **msgbufptr;
|
||||
@@ -1572,7 +1573,11 @@ vchiq_compat_ioctl_await_completion(struct file *file,
|
||||
sizeof(completion32)))
|
||||
return -EFAULT;
|
||||
|
||||
args32.msgbufcount--;
|
||||
if (get_user(msgbufcount_native, &args->msgbufcount))
|
||||
return -EFAULT;
|
||||
|
||||
if (!msgbufcount_native)
|
||||
args32.msgbufcount--;
|
||||
|
||||
msgbufcount32 =
|
||||
&((struct vchiq_await_completion32 __user *)arg)->msgbufcount;
|
||||
|
||||
@@ -64,6 +64,9 @@ static const struct usb_device_id usb_quirk_list[] = {
|
||||
/* Microsoft LifeCam-VX700 v2.0 */
|
||||
{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
|
||||
|
||||
/* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
|
||||
{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
|
||||
|
||||
/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
|
||||
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
|
||||
{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
|
||||
|
||||
@@ -1511,9 +1511,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
|
||||
unsigned transfer_in_flight;
|
||||
unsigned started;
|
||||
|
||||
if (dep->flags & DWC3_EP_STALL)
|
||||
return 0;
|
||||
|
||||
if (dep->number > 1)
|
||||
trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
|
||||
else
|
||||
@@ -1535,8 +1532,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
|
||||
else
|
||||
dep->flags |= DWC3_EP_STALL;
|
||||
} else {
|
||||
if (!(dep->flags & DWC3_EP_STALL))
|
||||
return 0;
|
||||
|
||||
ret = dwc3_send_clear_stall_ep_cmd(dep);
|
||||
if (ret)
|
||||
|
||||
@@ -39,4 +39,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999,
|
||||
"USB Card Reader",
|
||||
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
|
||||
|
||||
UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999,
|
||||
"Realtek",
|
||||
"USB Card Reader",
|
||||
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
|
||||
|
||||
UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999,
|
||||
"Realtek",
|
||||
"USB Card Reader",
|
||||
USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
|
||||
|
||||
#endif /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */
|
||||
|
||||
@@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
|
||||
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
||||
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o hash.o free-space-tree.o
|
||||
uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
#include "sysfs.h"
|
||||
#include "qgroup.h"
|
||||
#include "compression.h"
|
||||
#include "tree-checker.h"
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#include <asm/cpufeature.h>
|
||||
@@ -544,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CORRUPT(reason, eb, root, slot) \
|
||||
btrfs_crit(root->fs_info, \
|
||||
"corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node", \
|
||||
reason, btrfs_header_bytenr(eb), root->objectid, slot)
|
||||
|
||||
static noinline int check_leaf(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key leaf_key;
|
||||
u32 nritems = btrfs_header_nritems(leaf);
|
||||
int slot;
|
||||
|
||||
/*
|
||||
* Extent buffers from a relocation tree have a owner field that
|
||||
* corresponds to the subvolume tree they are based on. So just from an
|
||||
* extent buffer alone we can not find out what is the id of the
|
||||
* corresponding subvolume tree, so we can not figure out if the extent
|
||||
* buffer corresponds to the root of the relocation tree or not. So skip
|
||||
* this check for relocation trees.
|
||||
*/
|
||||
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
|
||||
struct btrfs_root *check_root;
|
||||
|
||||
key.objectid = btrfs_header_owner(leaf);
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
check_root = btrfs_get_fs_root(fs_info, &key, false);
|
||||
/*
|
||||
* The only reason we also check NULL here is that during
|
||||
* open_ctree() some roots has not yet been set up.
|
||||
*/
|
||||
if (!IS_ERR_OR_NULL(check_root)) {
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = btrfs_root_node(check_root);
|
||||
/* if leaf is the root, then it's fine */
|
||||
if (leaf != eb) {
|
||||
CORRUPT("non-root leaf's nritems is 0",
|
||||
leaf, check_root, 0);
|
||||
free_extent_buffer(eb);
|
||||
return -EIO;
|
||||
}
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (nritems == 0)
|
||||
return 0;
|
||||
|
||||
/* Check the 0 item */
|
||||
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
||||
CORRUPT("invalid item offset size pair", leaf, root, 0);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure each items keys are in the correct order and their
|
||||
* offsets make sense. We only have to loop through nritems-1 because
|
||||
* we check the current slot against the next slot, which verifies the
|
||||
* next slot's offset+size makes sense and that the current's slot
|
||||
* offset is correct.
|
||||
*/
|
||||
for (slot = 0; slot < nritems - 1; slot++) {
|
||||
btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot + 1);
|
||||
|
||||
/* Make sure the keys are in the right order */
|
||||
if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
|
||||
CORRUPT("bad key order", leaf, root, slot);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the offset and ends are right, remember that the
|
||||
* item data starts at the end of the leaf and grows towards the
|
||||
* front.
|
||||
*/
|
||||
if (btrfs_item_offset_nr(leaf, slot) !=
|
||||
btrfs_item_end_nr(leaf, slot + 1)) {
|
||||
CORRUPT("slot offset bad", leaf, root, slot);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure that we don't point outside of the leaf,
|
||||
* just in case all the items are consistent to each other, but
|
||||
* all point outside of the leaf.
|
||||
*/
|
||||
if (btrfs_item_end_nr(leaf, slot) >
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
||||
CORRUPT("slot end outside of leaf", leaf, root, slot);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_node(struct btrfs_root *root, struct extent_buffer *node)
|
||||
{
|
||||
unsigned long nr = btrfs_header_nritems(node);
|
||||
struct btrfs_key key, next_key;
|
||||
int slot;
|
||||
u64 bytenr;
|
||||
int ret = 0;
|
||||
|
||||
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt node: block %llu root %llu nritems %lu",
|
||||
node->start, root->objectid, nr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
for (slot = 0; slot < nr - 1; slot++) {
|
||||
bytenr = btrfs_node_blockptr(node, slot);
|
||||
btrfs_node_key_to_cpu(node, &key, slot);
|
||||
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
|
||||
|
||||
if (!bytenr) {
|
||||
CORRUPT("invalid item slot", node, root, slot);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
|
||||
CORRUPT("bad key order", node, root, slot);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
||||
u64 phy_offset, struct page *page,
|
||||
u64 start, u64 end, int mirror)
|
||||
@@ -749,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
||||
* that we don't try and read the other copies of this block, just
|
||||
* return -EIO.
|
||||
*/
|
||||
if (found_level == 0 && check_leaf(root, eb)) {
|
||||
if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
|
||||
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
if (found_level > 0 && check_node(root, eb))
|
||||
if (found_level > 0 && btrfs_check_node(root, eb))
|
||||
ret = -EIO;
|
||||
|
||||
if (!ret)
|
||||
@@ -4009,7 +3870,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
|
||||
buf->len,
|
||||
fs_info->dirty_metadata_batch);
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
|
||||
/*
|
||||
* Since btrfs_mark_buffer_dirty() can be called with item pointer set
|
||||
* but item data not updated.
|
||||
* So here we should only check item pointers, not item data.
|
||||
*/
|
||||
if (btrfs_header_level(buf) == 0 &&
|
||||
btrfs_check_leaf_relaxed(root, buf)) {
|
||||
btrfs_print_leaf(buf);
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
@@ -9828,6 +9828,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
|
||||
int ret = 0;
|
||||
struct btrfs_key found_key;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_block_group_item bg;
|
||||
u64 flags;
|
||||
int slot;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
|
||||
@@ -9862,8 +9864,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
|
||||
"logical %llu len %llu found bg but no related chunk",
|
||||
found_key.objectid, found_key.offset);
|
||||
ret = -ENOENT;
|
||||
} else if (em->start != found_key.objectid ||
|
||||
em->len != found_key.offset) {
|
||||
btrfs_err(fs_info,
|
||||
"block group %llu len %llu mismatch with chunk %llu len %llu",
|
||||
found_key.objectid, found_key.offset,
|
||||
em->start, em->len);
|
||||
ret = -EUCLEAN;
|
||||
} else {
|
||||
ret = 0;
|
||||
read_extent_buffer(leaf, &bg,
|
||||
btrfs_item_ptr_offset(leaf, slot),
|
||||
sizeof(bg));
|
||||
flags = btrfs_block_group_flags(&bg) &
|
||||
BTRFS_BLOCK_GROUP_TYPE_MASK;
|
||||
|
||||
if (flags != (em->map_lookup->type &
|
||||
BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
||||
btrfs_err(fs_info,
|
||||
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
|
||||
found_key.objectid,
|
||||
found_key.offset, flags,
|
||||
(BTRFS_BLOCK_GROUP_TYPE_MASK &
|
||||
em->map_lookup->type));
|
||||
ret = -EUCLEAN;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
free_extent_map(em);
|
||||
goto out;
|
||||
@@ -10092,6 +10118,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
|
||||
return cache;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Iterate all chunks and verify that each of them has the corresponding block
|
||||
* group
|
||||
*/
|
||||
static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
struct btrfs_block_group_cache *bg;
|
||||
u64 start = 0;
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
read_lock(&map_tree->map_tree.lock);
|
||||
/*
|
||||
* lookup_extent_mapping will return the first extent map
|
||||
* intersecting the range, so setting @len to 1 is enough to
|
||||
* get the first chunk.
|
||||
*/
|
||||
em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
|
||||
read_unlock(&map_tree->map_tree.lock);
|
||||
if (!em)
|
||||
break;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, em->start);
|
||||
if (!bg) {
|
||||
btrfs_err(fs_info,
|
||||
"chunk start=%llu len=%llu doesn't have corresponding block group",
|
||||
em->start, em->len);
|
||||
ret = -EUCLEAN;
|
||||
free_extent_map(em);
|
||||
break;
|
||||
}
|
||||
if (bg->key.objectid != em->start ||
|
||||
bg->key.offset != em->len ||
|
||||
(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
|
||||
(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
||||
btrfs_err(fs_info,
|
||||
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
|
||||
em->start, em->len,
|
||||
em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
|
||||
bg->key.objectid, bg->key.offset,
|
||||
bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
|
||||
ret = -EUCLEAN;
|
||||
free_extent_map(em);
|
||||
btrfs_put_block_group(bg);
|
||||
break;
|
||||
}
|
||||
start = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
@@ -10264,7 +10346,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||
}
|
||||
|
||||
init_global_block_rsv(info);
|
||||
ret = 0;
|
||||
ret = check_chunk_block_group_mappings(info);
|
||||
error:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
||||
@@ -4048,6 +4048,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
|
||||
restart:
|
||||
if (update_backref_cache(trans, &rc->backref_cache)) {
|
||||
btrfs_end_transaction(trans);
|
||||
trans = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -2176,6 +2176,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||
vol = memdup_user((void __user *)arg, sizeof(*vol));
|
||||
if (IS_ERR(vol))
|
||||
return PTR_ERR(vol);
|
||||
vol->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
|
||||
switch (cmd) {
|
||||
case BTRFS_IOC_SCAN_DEV:
|
||||
|
||||
@@ -1955,6 +1955,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
return ret;
|
||||
}
|
||||
|
||||
btrfs_trans_release_metadata(trans, fs_info);
|
||||
trans->block_rsv = NULL;
|
||||
|
||||
/* make a pass through all the delayed refs we have so far
|
||||
* any runnings procs may add more while we are here
|
||||
*/
|
||||
@@ -1964,9 +1967,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
return ret;
|
||||
}
|
||||
|
||||
btrfs_trans_release_metadata(trans, fs_info);
|
||||
trans->block_rsv = NULL;
|
||||
|
||||
cur_trans = trans->transaction;
|
||||
|
||||
/*
|
||||
|
||||
649
fs/btrfs/tree-checker.c
Normal file
649
fs/btrfs/tree-checker.c
Normal file
@@ -0,0 +1,649 @@
|
||||
/*
|
||||
* Copyright (C) Qu Wenruo 2017. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The module is used to catch unexpected/corrupted tree block data.
|
||||
* Such behavior can be caused either by a fuzzed image or bugs.
|
||||
*
|
||||
* The objective is to do leaf/node validation checks when tree block is read
|
||||
* from disk, and check *every* possible member, so other code won't
|
||||
* need to checking them again.
|
||||
*
|
||||
* Due to the potential and unwanted damage, every checker needs to be
|
||||
* carefully reviewed otherwise so it does not prevent mount of valid images.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "tree-checker.h"
|
||||
#include "disk-io.h"
|
||||
#include "compression.h"
|
||||
#include "hash.h"
|
||||
#include "volumes.h"
|
||||
|
||||
#define CORRUPT(reason, eb, root, slot) \
|
||||
btrfs_crit(root->fs_info, \
|
||||
"corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node", \
|
||||
reason, btrfs_header_bytenr(eb), root->objectid, slot)
|
||||
|
||||
/*
|
||||
* Error message should follow the following format:
|
||||
* corrupt <type>: <identifier>, <reason>[, <bad_value>]
|
||||
*
|
||||
* @type: leaf or node
|
||||
* @identifier: the necessary info to locate the leaf/node.
|
||||
* It's recommened to decode key.objecitd/offset if it's
|
||||
* meaningful.
|
||||
* @reason: describe the error
|
||||
* @bad_value: optional, it's recommened to output bad value and its
|
||||
* expected value (range).
|
||||
*
|
||||
* Since comma is used to separate the components, only space is allowed
|
||||
* inside each component.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
|
||||
* Allows callers to customize the output.
|
||||
*/
|
||||
__printf(4, 5)
|
||||
static void generic_err(const struct btrfs_root *root,
|
||||
const struct extent_buffer *eb, int slot,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
va_start(args, fmt);
|
||||
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt %s: root=%llu block=%llu slot=%d, %pV",
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node",
|
||||
root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static int check_extent_data_item(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_file_extent_item *fi;
|
||||
u32 sectorsize = root->fs_info->sectorsize;
|
||||
u32 item_size = btrfs_item_size_nr(leaf, slot);
|
||||
|
||||
if (!IS_ALIGNED(key->offset, sectorsize)) {
|
||||
CORRUPT("unaligned key offset for file extent",
|
||||
leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
|
||||
if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
|
||||
CORRUPT("invalid file extent type", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Support for new compression/encrption must introduce incompat flag,
|
||||
* and must be caught in open_ctree().
|
||||
*/
|
||||
if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
|
||||
CORRUPT("invalid file extent compression", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (btrfs_file_extent_encryption(leaf, fi)) {
|
||||
CORRUPT("invalid file extent encryption", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
|
||||
/* Inline extent must have 0 as key offset */
|
||||
if (key->offset) {
|
||||
CORRUPT("inline extent has non-zero key offset",
|
||||
leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Compressed inline extent has no on-disk size, skip it */
|
||||
if (btrfs_file_extent_compression(leaf, fi) !=
|
||||
BTRFS_COMPRESS_NONE)
|
||||
return 0;
|
||||
|
||||
/* Uncompressed inline extent size must match item size */
|
||||
if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
|
||||
btrfs_file_extent_ram_bytes(leaf, fi)) {
|
||||
CORRUPT("plaintext inline extent has invalid size",
|
||||
leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Regular or preallocated extent has fixed item size */
|
||||
if (item_size != sizeof(*fi)) {
|
||||
CORRUPT(
|
||||
"regluar or preallocated extent data item size is invalid",
|
||||
leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
|
||||
!IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
|
||||
!IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
|
||||
!IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
|
||||
!IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
|
||||
CORRUPT(
|
||||
"regular or preallocated extent data item has unaligned value",
|
||||
leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
u32 sectorsize = root->fs_info->sectorsize;
|
||||
u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
|
||||
|
||||
if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
|
||||
CORRUPT("invalid objectid for csum item", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (!IS_ALIGNED(key->offset, sectorsize)) {
|
||||
CORRUPT("unaligned key offset for csum item", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
|
||||
CORRUPT("unaligned csum item size", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Customized reported for dir_item, only important new info is key->objectid,
|
||||
* which represents inode number
|
||||
*/
|
||||
__printf(4, 5)
|
||||
static void dir_item_err(const struct btrfs_root *root,
|
||||
const struct extent_buffer *eb, int slot,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &key, slot);
|
||||
va_start(args, fmt);
|
||||
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
|
||||
btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static int check_dir_item(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
u32 item_size = btrfs_item_size_nr(leaf, slot);
|
||||
u32 cur = 0;
|
||||
|
||||
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
||||
while (cur < item_size) {
|
||||
u32 name_len;
|
||||
u32 data_len;
|
||||
u32 max_name_len;
|
||||
u32 total_size;
|
||||
u32 name_hash;
|
||||
u8 dir_type;
|
||||
|
||||
/* header itself should not cross item boundary */
|
||||
if (cur + sizeof(*di) > item_size) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"dir item header crosses item boundary, have %zu boundary %u",
|
||||
cur + sizeof(*di), item_size);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* dir type check */
|
||||
dir_type = btrfs_dir_type(leaf, di);
|
||||
if (dir_type >= BTRFS_FT_MAX) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"invalid dir item type, have %u expect [0, %u)",
|
||||
dir_type, BTRFS_FT_MAX);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (key->type == BTRFS_XATTR_ITEM_KEY &&
|
||||
dir_type != BTRFS_FT_XATTR) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"invalid dir item type for XATTR key, have %u expect %u",
|
||||
dir_type, BTRFS_FT_XATTR);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (dir_type == BTRFS_FT_XATTR &&
|
||||
key->type != BTRFS_XATTR_ITEM_KEY) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"xattr dir type found for non-XATTR key");
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (dir_type == BTRFS_FT_XATTR)
|
||||
max_name_len = XATTR_NAME_MAX;
|
||||
else
|
||||
max_name_len = BTRFS_NAME_LEN;
|
||||
|
||||
/* Name/data length check */
|
||||
name_len = btrfs_dir_name_len(leaf, di);
|
||||
data_len = btrfs_dir_data_len(leaf, di);
|
||||
if (name_len > max_name_len) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"dir item name len too long, have %u max %u",
|
||||
name_len, max_name_len);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"dir item name and data len too long, have %u max %u",
|
||||
name_len + data_len,
|
||||
BTRFS_MAX_XATTR_SIZE(root->fs_info));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (data_len && dir_type != BTRFS_FT_XATTR) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"dir item with invalid data len, have %u expect 0",
|
||||
data_len);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
total_size = sizeof(*di) + name_len + data_len;
|
||||
|
||||
/* header and name/data should not cross item boundary */
|
||||
if (cur + total_size > item_size) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"dir item data crosses item boundary, have %u boundary %u",
|
||||
cur + total_size, item_size);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Special check for XATTR/DIR_ITEM, as key->offset is name
|
||||
* hash, should match its name
|
||||
*/
|
||||
if (key->type == BTRFS_DIR_ITEM_KEY ||
|
||||
key->type == BTRFS_XATTR_ITEM_KEY) {
|
||||
char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
|
||||
|
||||
read_extent_buffer(leaf, namebuf,
|
||||
(unsigned long)(di + 1), name_len);
|
||||
name_hash = btrfs_name_hash(namebuf, name_len);
|
||||
if (key->offset != name_hash) {
|
||||
dir_item_err(root, leaf, slot,
|
||||
"name hash mismatch with key, have 0x%016x expect 0x%016llx",
|
||||
name_hash, key->offset);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
}
|
||||
cur += total_size;
|
||||
di = (struct btrfs_dir_item *)((void *)di + total_size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
__printf(4, 5)
|
||||
__cold
|
||||
static void block_group_err(const struct btrfs_fs_info *fs_info,
|
||||
const struct extent_buffer *eb, int slot,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &key, slot);
|
||||
va_start(args, fmt);
|
||||
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
btrfs_crit(fs_info,
|
||||
"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node",
|
||||
btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
|
||||
key.objectid, key.offset, &vaf);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static int check_block_group_item(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_block_group_item bgi;
|
||||
u32 item_size = btrfs_item_size_nr(leaf, slot);
|
||||
u64 flags;
|
||||
u64 type;
|
||||
|
||||
/*
|
||||
* Here we don't really care about alignment since extent allocator can
|
||||
* handle it. We care more about the size, as if one block group is
|
||||
* larger than maximum size, it's must be some obvious corruption.
|
||||
*/
|
||||
if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
|
||||
block_group_err(fs_info, leaf, slot,
|
||||
"invalid block group size, have %llu expect (0, %llu]",
|
||||
key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (item_size != sizeof(bgi)) {
|
||||
block_group_err(fs_info, leaf, slot,
|
||||
"invalid item size, have %u expect %zu",
|
||||
item_size, sizeof(bgi));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
|
||||
sizeof(bgi));
|
||||
if (btrfs_block_group_chunk_objectid(&bgi) !=
|
||||
BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
|
||||
block_group_err(fs_info, leaf, slot,
|
||||
"invalid block group chunk objectid, have %llu expect %llu",
|
||||
btrfs_block_group_chunk_objectid(&bgi),
|
||||
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (btrfs_block_group_used(&bgi) > key->offset) {
|
||||
block_group_err(fs_info, leaf, slot,
|
||||
"invalid block group used, have %llu expect [0, %llu)",
|
||||
btrfs_block_group_used(&bgi), key->offset);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
flags = btrfs_block_group_flags(&bgi);
|
||||
if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
|
||||
block_group_err(fs_info, leaf, slot,
|
||||
"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
|
||||
flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
|
||||
hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
|
||||
if (type != BTRFS_BLOCK_GROUP_DATA &&
|
||||
type != BTRFS_BLOCK_GROUP_METADATA &&
|
||||
type != BTRFS_BLOCK_GROUP_SYSTEM &&
|
||||
type != (BTRFS_BLOCK_GROUP_METADATA |
|
||||
BTRFS_BLOCK_GROUP_DATA)) {
|
||||
block_group_err(fs_info, leaf, slot,
|
||||
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
|
||||
type, hweight64(type),
|
||||
BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
|
||||
BTRFS_BLOCK_GROUP_SYSTEM,
|
||||
BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Common point to switch the item-specific validation.
|
||||
*/
|
||||
static int check_leaf_item(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (key->type) {
|
||||
case BTRFS_EXTENT_DATA_KEY:
|
||||
ret = check_extent_data_item(root, leaf, key, slot);
|
||||
break;
|
||||
case BTRFS_EXTENT_CSUM_KEY:
|
||||
ret = check_csum_item(root, leaf, key, slot);
|
||||
break;
|
||||
case BTRFS_DIR_ITEM_KEY:
|
||||
case BTRFS_DIR_INDEX_KEY:
|
||||
case BTRFS_XATTR_ITEM_KEY:
|
||||
ret = check_dir_item(root, leaf, key, slot);
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_ITEM_KEY:
|
||||
ret = check_block_group_item(root->fs_info, leaf, key, slot);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
|
||||
bool check_item_data)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
/* No valid key type is 0, so all key should be larger than this key */
|
||||
struct btrfs_key prev_key = {0, 0, 0};
|
||||
struct btrfs_key key;
|
||||
u32 nritems = btrfs_header_nritems(leaf);
|
||||
int slot;
|
||||
|
||||
if (btrfs_header_level(leaf) != 0) {
|
||||
generic_err(root, leaf, 0,
|
||||
"invalid level for leaf, have %d expect 0",
|
||||
btrfs_header_level(leaf));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Extent buffers from a relocation tree have a owner field that
|
||||
* corresponds to the subvolume tree they are based on. So just from an
|
||||
* extent buffer alone we can not find out what is the id of the
|
||||
* corresponding subvolume tree, so we can not figure out if the extent
|
||||
* buffer corresponds to the root of the relocation tree or not. So
|
||||
* skip this check for relocation trees.
|
||||
*/
|
||||
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
|
||||
u64 owner = btrfs_header_owner(leaf);
|
||||
struct btrfs_root *check_root;
|
||||
|
||||
/* These trees must never be empty */
|
||||
if (owner == BTRFS_ROOT_TREE_OBJECTID ||
|
||||
owner == BTRFS_CHUNK_TREE_OBJECTID ||
|
||||
owner == BTRFS_EXTENT_TREE_OBJECTID ||
|
||||
owner == BTRFS_DEV_TREE_OBJECTID ||
|
||||
owner == BTRFS_FS_TREE_OBJECTID ||
|
||||
owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
||||
generic_err(root, leaf, 0,
|
||||
"invalid root, root %llu must never be empty",
|
||||
owner);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
key.objectid = owner;
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
check_root = btrfs_get_fs_root(fs_info, &key, false);
|
||||
/*
|
||||
* The only reason we also check NULL here is that during
|
||||
* open_ctree() some roots has not yet been set up.
|
||||
*/
|
||||
if (!IS_ERR_OR_NULL(check_root)) {
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = btrfs_root_node(check_root);
|
||||
/* if leaf is the root, then it's fine */
|
||||
if (leaf != eb) {
|
||||
CORRUPT("non-root leaf's nritems is 0",
|
||||
leaf, check_root, 0);
|
||||
free_extent_buffer(eb);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (nritems == 0)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Check the following things to make sure this is a good leaf, and
|
||||
* leaf users won't need to bother with similar sanity checks:
|
||||
*
|
||||
* 1) key ordering
|
||||
* 2) item offset and size
|
||||
* No overlap, no hole, all inside the leaf.
|
||||
* 3) item content
|
||||
* If possible, do comprehensive sanity check.
|
||||
* NOTE: All checks must only rely on the item data itself.
|
||||
*/
|
||||
for (slot = 0; slot < nritems; slot++) {
|
||||
u32 item_end_expected;
|
||||
int ret;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
|
||||
/* Make sure the keys are in the right order */
|
||||
if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
|
||||
CORRUPT("bad key order", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the offset and ends are right, remember that the
|
||||
* item data starts at the end of the leaf and grows towards the
|
||||
* front.
|
||||
*/
|
||||
if (slot == 0)
|
||||
item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
|
||||
else
|
||||
item_end_expected = btrfs_item_offset_nr(leaf,
|
||||
slot - 1);
|
||||
if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
|
||||
CORRUPT("slot offset bad", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure that we don't point outside of the leaf,
|
||||
* just in case all the items are consistent to each other, but
|
||||
* all point outside of the leaf.
|
||||
*/
|
||||
if (btrfs_item_end_nr(leaf, slot) >
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
||||
CORRUPT("slot end outside of leaf", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Also check if the item pointer overlaps with btrfs item. */
|
||||
if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
|
||||
btrfs_item_ptr_offset(leaf, slot)) {
|
||||
CORRUPT("slot overlap with its data", leaf, root, slot);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (check_item_data) {
|
||||
/*
|
||||
* Check if the item size and content meet other
|
||||
* criteria
|
||||
*/
|
||||
ret = check_leaf_item(root, leaf, &key, slot);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
prev_key.objectid = key.objectid;
|
||||
prev_key.type = key.type;
|
||||
prev_key.offset = key.offset;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
|
||||
{
|
||||
return check_leaf(root, leaf, true);
|
||||
}
|
||||
|
||||
int btrfs_check_leaf_relaxed(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf)
|
||||
{
|
||||
return check_leaf(root, leaf, false);
|
||||
}
|
||||
|
||||
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
|
||||
{
|
||||
unsigned long nr = btrfs_header_nritems(node);
|
||||
struct btrfs_key key, next_key;
|
||||
int slot;
|
||||
int level = btrfs_header_level(node);
|
||||
u64 bytenr;
|
||||
int ret = 0;
|
||||
|
||||
if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
|
||||
generic_err(root, node, 0,
|
||||
"invalid level for node, have %d expect [1, %d]",
|
||||
level, BTRFS_MAX_LEVEL - 1);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
|
||||
root->objectid, node->start,
|
||||
nr == 0 ? "small" : "large", nr,
|
||||
BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
for (slot = 0; slot < nr - 1; slot++) {
|
||||
bytenr = btrfs_node_blockptr(node, slot);
|
||||
btrfs_node_key_to_cpu(node, &key, slot);
|
||||
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
|
||||
|
||||
if (!bytenr) {
|
||||
generic_err(root, node, slot,
|
||||
"invalid NULL node pointer");
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
|
||||
generic_err(root, node, slot,
|
||||
"unaligned pointer, have %llu should be aligned to %u",
|
||||
bytenr, root->fs_info->sectorsize);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
|
||||
generic_err(root, node, slot,
|
||||
"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
|
||||
key.objectid, key.type, key.offset,
|
||||
next_key.objectid, next_key.type,
|
||||
next_key.offset);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
38
fs/btrfs/tree-checker.h
Normal file
38
fs/btrfs/tree-checker.h
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) Qu Wenruo 2017. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_TREE_CHECKER__
|
||||
#define __BTRFS_TREE_CHECKER__
|
||||
|
||||
#include "ctree.h"
|
||||
#include "extent_io.h"
|
||||
|
||||
/*
|
||||
* Comprehensive leaf checker.
|
||||
* Will check not only the item pointers, but also every possible member
|
||||
* in item data.
|
||||
*/
|
||||
int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
|
||||
|
||||
/*
|
||||
* Less strict leaf checker.
|
||||
* Will only check item pointers, not reading item data.
|
||||
*/
|
||||
int btrfs_check_leaf_relaxed(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf);
|
||||
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
|
||||
|
||||
#endif
|
||||
@@ -4647,7 +4647,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (type & BTRFS_BLOCK_GROUP_DATA) {
|
||||
max_stripe_size = SZ_1G;
|
||||
max_chunk_size = 10 * max_stripe_size;
|
||||
max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
|
||||
if (!devs_max)
|
||||
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
|
||||
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
|
||||
@@ -6353,6 +6353,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
|
||||
u16 num_stripes;
|
||||
u16 sub_stripes;
|
||||
u64 type;
|
||||
u64 features;
|
||||
bool mixed = false;
|
||||
|
||||
length = btrfs_chunk_length(leaf, chunk);
|
||||
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
|
||||
@@ -6391,6 +6393,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
|
||||
btrfs_chunk_type(leaf, chunk));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
|
||||
btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
|
||||
(type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
|
||||
btrfs_err(fs_info,
|
||||
"system chunk with data or metadata type: 0x%llx", type);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(fs_info->super_copy);
|
||||
if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
|
||||
mixed = true;
|
||||
|
||||
if (!mixed) {
|
||||
if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
|
||||
(type & BTRFS_BLOCK_GROUP_DATA)) {
|
||||
btrfs_err(fs_info,
|
||||
"mixed chunk type in non-mixed mode: 0x%llx", type);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
|
||||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
|
||||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
#include <linux/btrfs.h>
|
||||
#include "async-thread.h"
|
||||
|
||||
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
|
||||
|
||||
extern struct mutex uuid_mutex;
|
||||
|
||||
#define BTRFS_STRIPE_LEN SZ_64K
|
||||
|
||||
@@ -4079,6 +4079,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
|
||||
return auth;
|
||||
}
|
||||
|
||||
static int add_authorizer_challenge(struct ceph_connection *con,
|
||||
void *challenge_buf, int challenge_buf_len)
|
||||
{
|
||||
struct ceph_mds_session *s = con->private;
|
||||
struct ceph_mds_client *mdsc = s->s_mdsc;
|
||||
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
|
||||
|
||||
return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
|
||||
challenge_buf, challenge_buf_len);
|
||||
}
|
||||
|
||||
static int verify_authorizer_reply(struct ceph_connection *con)
|
||||
{
|
||||
@@ -4142,6 +4152,7 @@ static const struct ceph_connection_operations mds_con_ops = {
|
||||
.put = con_put,
|
||||
.dispatch = dispatch,
|
||||
.get_authorizer = get_authorizer,
|
||||
.add_authorizer_challenge = add_authorizer_challenge,
|
||||
.verify_authorizer_reply = verify_authorizer_reply,
|
||||
.invalidate_authorizer = invalidate_authorizer,
|
||||
.peer_reset = peer_reset,
|
||||
|
||||
@@ -304,8 +304,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
|
||||
*/
|
||||
dio->iocb->ki_pos += transferred;
|
||||
|
||||
if (dio->op == REQ_OP_WRITE)
|
||||
ret = generic_write_sync(dio->iocb, transferred);
|
||||
if (ret > 0 && dio->op == REQ_OP_WRITE)
|
||||
ret = generic_write_sync(dio->iocb, ret);
|
||||
dio->iocb->ki_complete(dio->iocb, ret, 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -612,9 +612,9 @@ skip_replace:
|
||||
}
|
||||
|
||||
cleanup:
|
||||
brelse(bh);
|
||||
if (!(bh && header == HDR(bh)))
|
||||
kfree(header);
|
||||
brelse(bh);
|
||||
up_write(&EXT2_I(inode)->xattr_sem);
|
||||
|
||||
return error;
|
||||
|
||||
@@ -85,8 +85,10 @@ repeat:
|
||||
fio.page = page;
|
||||
|
||||
if (f2fs_submit_page_bio(&fio)) {
|
||||
f2fs_put_page(page, 1);
|
||||
goto repeat;
|
||||
memset(page_address(page), 0, PAGE_SIZE);
|
||||
f2fs_stop_checkpoint(sbi, false);
|
||||
f2fs_bug_on(sbi, 1);
|
||||
return page;
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
@@ -117,7 +119,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
|
||||
return __get_meta_page(sbi, index, false);
|
||||
}
|
||||
|
||||
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
|
||||
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
|
||||
block_t blkaddr, int type)
|
||||
{
|
||||
switch (type) {
|
||||
case META_NAT:
|
||||
@@ -137,8 +140,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
|
||||
return false;
|
||||
break;
|
||||
case META_POR:
|
||||
case DATA_GENERIC:
|
||||
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
|
||||
blkaddr < MAIN_BLKADDR(sbi)))
|
||||
blkaddr < MAIN_BLKADDR(sbi))) {
|
||||
if (type == DATA_GENERIC) {
|
||||
f2fs_msg(sbi->sb, KERN_WARNING,
|
||||
"access invalid blkaddr:%u", blkaddr);
|
||||
WARN_ON(1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case META_GENERIC:
|
||||
if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
|
||||
blkaddr >= MAIN_BLKADDR(sbi)))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
@@ -173,7 +188,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
|
||||
blk_start_plug(&plug);
|
||||
for (; nrpages-- > 0; blkno++) {
|
||||
|
||||
if (!is_valid_blkaddr(sbi, blkno, type))
|
||||
if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
|
||||
goto out;
|
||||
|
||||
switch (type) {
|
||||
@@ -774,6 +789,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
|
||||
&cp_page_1, version);
|
||||
if (err)
|
||||
return NULL;
|
||||
|
||||
if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
|
||||
sbi->blocks_per_seg) {
|
||||
f2fs_msg(sbi->sb, KERN_WARNING,
|
||||
"invalid cp_pack_total_block_count:%u",
|
||||
le32_to_cpu(cp_block->cp_pack_total_block_count));
|
||||
goto invalid_cp;
|
||||
}
|
||||
pre_version = *version;
|
||||
|
||||
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
|
||||
@@ -837,15 +860,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
|
||||
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
|
||||
memcpy(sbi->ckpt, cp_block, blk_size);
|
||||
|
||||
/* Sanity checking of checkpoint */
|
||||
if (sanity_check_ckpt(sbi))
|
||||
goto free_fail_no_cp;
|
||||
|
||||
if (cur_page == cp1)
|
||||
sbi->cur_cp_pack = 1;
|
||||
else
|
||||
sbi->cur_cp_pack = 2;
|
||||
|
||||
/* Sanity checking of checkpoint */
|
||||
if (sanity_check_ckpt(sbi))
|
||||
goto free_fail_no_cp;
|
||||
|
||||
if (cp_blks <= 1)
|
||||
goto done;
|
||||
|
||||
|
||||
@@ -440,7 +440,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
|
||||
struct page *page = fio->encrypted_page ?
|
||||
fio->encrypted_page : fio->page;
|
||||
|
||||
verify_block_addr(fio, fio->new_blkaddr);
|
||||
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
|
||||
__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
|
||||
return -EFAULT;
|
||||
|
||||
trace_f2fs_submit_page_bio(page, fio);
|
||||
f2fs_trace_ios(fio, 0);
|
||||
|
||||
@@ -485,7 +488,7 @@ next:
|
||||
spin_unlock(&io->io_lock);
|
||||
}
|
||||
|
||||
if (fio->old_blkaddr != NEW_ADDR)
|
||||
if (__is_valid_data_blkaddr(fio->old_blkaddr))
|
||||
verify_block_addr(fio, fio->old_blkaddr);
|
||||
verify_block_addr(fio, fio->new_blkaddr);
|
||||
|
||||
@@ -1045,7 +1048,13 @@ next_dnode:
|
||||
next_block:
|
||||
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
|
||||
|
||||
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
|
||||
if (__is_valid_data_blkaddr(blkaddr) &&
|
||||
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
|
||||
err = -EFAULT;
|
||||
goto sync_out;
|
||||
}
|
||||
|
||||
if (!is_valid_data_blkaddr(sbi, blkaddr)) {
|
||||
if (create) {
|
||||
if (unlikely(f2fs_cp_error(sbi))) {
|
||||
err = -EIO;
|
||||
@@ -1495,6 +1504,10 @@ got_it:
|
||||
SetPageUptodate(page);
|
||||
goto confused;
|
||||
}
|
||||
|
||||
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
|
||||
DATA_GENERIC))
|
||||
goto set_error_page;
|
||||
} else {
|
||||
zero_user_segment(page, 0, PAGE_SIZE);
|
||||
if (!PageUptodate(page))
|
||||
@@ -1680,15 +1693,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
|
||||
return should_update_inplace(inode, fio);
|
||||
}
|
||||
|
||||
static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
|
||||
{
|
||||
if (fio->old_blkaddr == NEW_ADDR)
|
||||
return false;
|
||||
if (fio->old_blkaddr == NULL_ADDR)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
int do_write_data_page(struct f2fs_io_info *fio)
|
||||
{
|
||||
struct page *page = fio->page;
|
||||
@@ -1703,11 +1707,13 @@ int do_write_data_page(struct f2fs_io_info *fio)
|
||||
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
|
||||
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
|
||||
|
||||
if (valid_ipu_blkaddr(fio)) {
|
||||
ipu_force = true;
|
||||
fio->need_lock = LOCK_DONE;
|
||||
goto got_it;
|
||||
}
|
||||
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
|
||||
DATA_GENERIC))
|
||||
return -EFAULT;
|
||||
|
||||
ipu_force = true;
|
||||
fio->need_lock = LOCK_DONE;
|
||||
goto got_it;
|
||||
}
|
||||
|
||||
/* Deadlock due to between page->lock and f2fs_lock_op */
|
||||
@@ -1726,11 +1732,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
|
||||
goto out_writepage;
|
||||
}
|
||||
got_it:
|
||||
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
|
||||
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
|
||||
DATA_GENERIC)) {
|
||||
err = -EFAULT;
|
||||
goto out_writepage;
|
||||
}
|
||||
/*
|
||||
* If current allocation needs SSR,
|
||||
* it had better in-place writes for updated data.
|
||||
*/
|
||||
if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
|
||||
if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
|
||||
need_inplace_update(fio))) {
|
||||
err = encrypt_one_page(fio);
|
||||
if (err)
|
||||
goto out_writepage;
|
||||
|
||||
@@ -193,7 +193,7 @@ struct cp_control {
|
||||
};
|
||||
|
||||
/*
|
||||
* For CP/NAT/SIT/SSA readahead
|
||||
* indicate meta/data type
|
||||
*/
|
||||
enum {
|
||||
META_CP,
|
||||
@@ -201,6 +201,8 @@ enum {
|
||||
META_SIT,
|
||||
META_SSA,
|
||||
META_POR,
|
||||
DATA_GENERIC,
|
||||
META_GENERIC,
|
||||
};
|
||||
|
||||
/* for the list of ino */
|
||||
@@ -2599,6 +2601,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
|
||||
spin_unlock(&sbi->iostat_lock);
|
||||
}
|
||||
|
||||
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
|
||||
(!is_read_io(fio->op) || fio->is_meta))
|
||||
|
||||
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
|
||||
block_t blkaddr, int type);
|
||||
void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
|
||||
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
|
||||
block_t blkaddr, int type)
|
||||
{
|
||||
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
|
||||
f2fs_msg(sbi->sb, KERN_ERR,
|
||||
"invalid blkaddr: %u, type: %d, run fsck to fix.",
|
||||
blkaddr, type);
|
||||
f2fs_bug_on(sbi, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool __is_valid_data_blkaddr(block_t blkaddr)
|
||||
{
|
||||
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
|
||||
block_t blkaddr)
|
||||
{
|
||||
if (!__is_valid_data_blkaddr(blkaddr))
|
||||
return false;
|
||||
verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* file.c
|
||||
*/
|
||||
@@ -2817,7 +2852,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
|
||||
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
|
||||
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
|
||||
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
|
||||
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
|
||||
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
|
||||
block_t blkaddr, int type);
|
||||
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
|
||||
int type, bool sync);
|
||||
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
|
||||
|
||||
@@ -345,13 +345,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
|
||||
return pgofs;
|
||||
}
|
||||
|
||||
static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
|
||||
int whence)
|
||||
static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
|
||||
pgoff_t dirty, pgoff_t pgofs, int whence)
|
||||
{
|
||||
switch (whence) {
|
||||
case SEEK_DATA:
|
||||
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
|
||||
(blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
|
||||
is_valid_data_blkaddr(sbi, blkaddr))
|
||||
return true;
|
||||
break;
|
||||
case SEEK_HOLE:
|
||||
@@ -414,7 +414,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
|
||||
blkaddr = datablock_addr(dn.inode,
|
||||
dn.node_page, dn.ofs_in_node);
|
||||
|
||||
if (__found_offset(blkaddr, dirty, pgofs, whence)) {
|
||||
if (__is_valid_data_blkaddr(blkaddr) &&
|
||||
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
|
||||
blkaddr, DATA_GENERIC)) {
|
||||
f2fs_put_dnode(&dn);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
|
||||
pgofs, whence)) {
|
||||
f2fs_put_dnode(&dn);
|
||||
goto found;
|
||||
}
|
||||
@@ -506,6 +514,11 @@ void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
|
||||
|
||||
dn->data_blkaddr = NULL_ADDR;
|
||||
set_data_blkaddr(dn);
|
||||
|
||||
if (__is_valid_data_blkaddr(blkaddr) &&
|
||||
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
|
||||
continue;
|
||||
|
||||
invalidate_blocks(sbi, blkaddr);
|
||||
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
|
||||
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
|
||||
|
||||
@@ -68,11 +68,12 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
|
||||
}
|
||||
}
|
||||
|
||||
static bool __written_first_block(struct f2fs_inode *ri)
|
||||
static bool __written_first_block(struct f2fs_sb_info *sbi,
|
||||
struct f2fs_inode *ri)
|
||||
{
|
||||
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
|
||||
|
||||
if (addr != NEW_ADDR && addr != NULL_ADDR)
|
||||
if (is_valid_data_blkaddr(sbi, addr))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@@ -185,6 +186,72 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
|
||||
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
|
||||
}
|
||||
|
||||
static bool sanity_check_inode(struct inode *inode, struct page *node_page)
|
||||
{
|
||||
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||
struct f2fs_inode_info *fi = F2FS_I(inode);
|
||||
unsigned long long iblocks;
|
||||
|
||||
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
|
||||
if (!iblocks) {
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_msg(sbi->sb, KERN_WARNING,
|
||||
"%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
|
||||
"run fsck to fix.",
|
||||
__func__, inode->i_ino, iblocks);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ino_of_node(node_page) != nid_of_node(node_page)) {
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_msg(sbi->sb, KERN_WARNING,
|
||||
"%s: corrupted inode footer i_ino=%lx, ino,nid: "
|
||||
"[%u, %u] run fsck to fix.",
|
||||
__func__, inode->i_ino,
|
||||
ino_of_node(node_page), nid_of_node(node_page));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (f2fs_has_extra_attr(inode) &&
|
||||
!f2fs_sb_has_extra_attr(sbi->sb)) {
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_msg(sbi->sb, KERN_WARNING,
|
||||
"%s: inode (ino=%lx) is with extra_attr, "
|
||||
"but extra_attr feature is off",
|
||||
__func__, inode->i_ino);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
|
||||
fi->i_extra_isize % sizeof(__le32)) {
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_msg(sbi->sb, KERN_WARNING,
|
||||
"%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
|
||||
"max: %zu",
|
||||
__func__, inode->i_ino, fi->i_extra_isize,
|
||||
F2FS_TOTAL_EXTRA_ATTR_SIZE);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (F2FS_I(inode)->extent_tree) {
|
||||
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
|
||||
|
||||
if (ei->len &&
|
||||
(!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
|
||||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
|
||||
DATA_GENERIC))) {
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
f2fs_msg(sbi->sb, KERN_WARNING,
|
||||
"%s: inode (ino=%lx) extent info [%u, %u, %u] "
|
||||
"is incorrect, run fsck to fix",
|
||||
__func__, inode->i_ino,
|
||||
ei->blk, ei->fofs, ei->len);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int do_read_inode(struct inode *inode)
|
||||
{
|
||||
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||
@@ -234,21 +301,9 @@ static int do_read_inode(struct inode *inode)
|
||||
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
|
||||
le16_to_cpu(ri->i_extra_isize) : 0;
|
||||
|
||||
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
|
||||
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
|
||||
fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
|
||||
} else if (f2fs_has_inline_xattr(inode) ||
|
||||
f2fs_has_inline_dentry(inode)) {
|
||||
fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
|
||||
} else {
|
||||
|
||||
/*
|
||||
* Previous inline data or directory always reserved 200 bytes
|
||||
* in inode layout, even if inline_xattr is disabled. In order
|
||||
* to keep inline_dentry's structure for backward compatibility,
|
||||
* we get the space back only from inline_data.
|
||||
*/
|
||||
fi->i_inline_xattr_size = 0;
|
||||
if (!sanity_check_inode(inode, node_page)) {
|
||||
f2fs_put_page(node_page, 1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* check data exist */
|
||||
@@ -258,7 +313,7 @@ static int do_read_inode(struct inode *inode)
|
||||
/* get rdev by using inline_info */
|
||||
__get_inode_rdev(inode, ri);
|
||||
|
||||
if (__written_first_block(ri))
|
||||
if (__written_first_block(sbi, ri))
|
||||
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
|
||||
|
||||
if (!need_inode_block_update(sbi, inode->i_ino))
|
||||
|
||||
@@ -379,8 +379,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
|
||||
new_blkaddr == NULL_ADDR);
|
||||
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
|
||||
new_blkaddr == NEW_ADDR);
|
||||
f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
|
||||
nat_get_blkaddr(e) != NULL_ADDR &&
|
||||
f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
|
||||
new_blkaddr == NEW_ADDR);
|
||||
|
||||
/* increment version no as node is removed */
|
||||
@@ -391,7 +390,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
|
||||
|
||||
/* change address */
|
||||
nat_set_blkaddr(e, new_blkaddr);
|
||||
if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
|
||||
if (!is_valid_data_blkaddr(sbi, new_blkaddr))
|
||||
set_nat_flag(e, IS_CHECKPOINTED, false);
|
||||
__set_nat_cache_dirty(nm_i, e);
|
||||
|
||||
@@ -1411,6 +1410,12 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (__is_valid_data_blkaddr(ni.blk_addr) &&
|
||||
!f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
|
||||
up_read(&sbi->node_write);
|
||||
goto redirty_out;
|
||||
}
|
||||
|
||||
if (atomic && !test_opt(sbi, NOBARRIER))
|
||||
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
|
||||
|
||||
|
||||
@@ -255,7 +255,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
|
||||
while (1) {
|
||||
struct fsync_inode_entry *entry;
|
||||
|
||||
if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
|
||||
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
|
||||
return 0;
|
||||
|
||||
page = get_tmp_page(sbi, blkaddr);
|
||||
@@ -509,7 +509,7 @@ retry_dn:
|
||||
}
|
||||
|
||||
/* dest is valid block, try to recover from src to dest */
|
||||
if (is_valid_blkaddr(sbi, dest, META_POR)) {
|
||||
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
|
||||
|
||||
if (src == NULL_ADDR) {
|
||||
err = reserve_new_block(&dn);
|
||||
@@ -570,7 +570,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
|
||||
while (1) {
|
||||
struct fsync_inode_entry *entry;
|
||||
|
||||
if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
|
||||
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
|
||||
break;
|
||||
|
||||
ra_meta_pages_cond(sbi, blkaddr);
|
||||
|
||||
@@ -1892,7 +1892,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
|
||||
struct seg_entry *se;
|
||||
bool is_cp = false;
|
||||
|
||||
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
|
||||
if (!is_valid_data_blkaddr(sbi, blkaddr))
|
||||
return true;
|
||||
|
||||
down_read(&sit_i->sentry_lock);
|
||||
@@ -2953,7 +2953,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
|
||||
{
|
||||
struct page *cpage;
|
||||
|
||||
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
|
||||
if (!is_valid_data_blkaddr(sbi, blkaddr))
|
||||
return;
|
||||
|
||||
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
|
||||
@@ -3671,6 +3671,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
|
||||
unsigned int old_valid_blocks;
|
||||
|
||||
start = le32_to_cpu(segno_in_journal(journal, i));
|
||||
if (start >= MAIN_SEGS(sbi)) {
|
||||
f2fs_msg(sbi->sb, KERN_ERR,
|
||||
"Wrong journal entry on segno %u",
|
||||
start);
|
||||
set_sbi_flag(sbi, SBI_NEED_FSCK);
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
se = &sit_i->sentries[start];
|
||||
sit = sit_in_journal(journal, i);
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@
|
||||
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
|
||||
|
||||
#define GET_SEGNO(sbi, blk_addr) \
|
||||
((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
|
||||
((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
|
||||
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
|
||||
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
|
||||
#define BLKS_PER_SEC(sbi) \
|
||||
@@ -645,13 +645,10 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
|
||||
{
|
||||
struct f2fs_sb_info *sbi = fio->sbi;
|
||||
|
||||
if (PAGE_TYPE_OF_BIO(fio->type) == META &&
|
||||
(!is_read_io(fio->op) || fio->is_meta))
|
||||
BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
|
||||
blk_addr >= MAIN_BLKADDR(sbi));
|
||||
if (__is_meta_io(fio))
|
||||
verify_blkaddr(sbi, blk_addr, META_GENERIC);
|
||||
else
|
||||
BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
|
||||
blk_addr >= MAX_BLKADDR(sbi));
|
||||
verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -2152,6 +2152,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
|
||||
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
block_t segment_count, segs_per_sec, secs_per_zone;
|
||||
block_t total_sections, blocks_per_seg;
|
||||
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
|
||||
(bh->b_data + F2FS_SUPER_OFFSET);
|
||||
struct super_block *sb = sbi->sb;
|
||||
@@ -2208,6 +2210,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
|
||||
return 1;
|
||||
}
|
||||
|
||||
segment_count = le32_to_cpu(raw_super->segment_count);
|
||||
segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
|
||||
secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
|
||||
total_sections = le32_to_cpu(raw_super->section_count);
|
||||
|
||||
/* blocks_per_seg should be 512, given the above check */
|
||||
blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
|
||||
|
||||
if (segment_count > F2FS_MAX_SEGMENT ||
|
||||
segment_count < F2FS_MIN_SEGMENTS) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Invalid segment count (%u)",
|
||||
segment_count);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (total_sections > segment_count ||
|
||||
total_sections < F2FS_MIN_SEGMENTS ||
|
||||
segs_per_sec > segment_count || !segs_per_sec) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Invalid segment/section count (%u, %u x %u)",
|
||||
segment_count, total_sections, segs_per_sec);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((segment_count / segs_per_sec) < total_sections) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Small segment_count (%u < %u * %u)",
|
||||
segment_count, segs_per_sec, total_sections);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Wrong segment_count / block_count (%u > %u)",
|
||||
segment_count, le32_to_cpu(raw_super->block_count));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (secs_per_zone > total_sections || !secs_per_zone) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Wrong secs_per_zone / total_sections (%u, %u)",
|
||||
secs_per_zone, total_sections);
|
||||
return 1;
|
||||
}
|
||||
if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Corrupted extension count (%u > %u)",
|
||||
le32_to_cpu(raw_super->extension_count),
|
||||
F2FS_MAX_EXTENSION);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(raw_super->cp_payload) >
|
||||
(blocks_per_seg - F2FS_CP_PACKS)) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Insane cp_payload (%u > %u)",
|
||||
le32_to_cpu(raw_super->cp_payload),
|
||||
blocks_per_seg - F2FS_CP_PACKS);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* check reserved ino info */
|
||||
if (le32_to_cpu(raw_super->node_ino) != 1 ||
|
||||
le32_to_cpu(raw_super->meta_ino) != 2 ||
|
||||
@@ -2220,13 +2284,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
|
||||
f2fs_msg(sb, KERN_INFO,
|
||||
"Invalid segment count (%u)",
|
||||
le32_to_cpu(raw_super->segment_count));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
|
||||
if (sanity_check_area_boundary(sbi, bh))
|
||||
return 1;
|
||||
@@ -2244,6 +2301,9 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
||||
unsigned int sit_segs, nat_segs;
|
||||
unsigned int sit_bitmap_size, nat_bitmap_size;
|
||||
unsigned int log_blocks_per_seg;
|
||||
unsigned int segment_count_main;
|
||||
unsigned int cp_pack_start_sum, cp_payload;
|
||||
block_t user_block_count;
|
||||
int i;
|
||||
|
||||
total = le32_to_cpu(raw_super->segment_count);
|
||||
@@ -2268,6 +2328,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
||||
return 1;
|
||||
}
|
||||
|
||||
user_block_count = le64_to_cpu(ckpt->user_block_count);
|
||||
segment_count_main = le32_to_cpu(raw_super->segment_count_main);
|
||||
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
|
||||
if (!user_block_count || user_block_count >=
|
||||
segment_count_main << log_blocks_per_seg) {
|
||||
f2fs_msg(sbi->sb, KERN_ERR,
|
||||
"Wrong user_block_count: %u", user_block_count);
|
||||
return 1;
|
||||
}
|
||||
|
||||
main_segs = le32_to_cpu(raw_super->segment_count_main);
|
||||
blocks_per_seg = sbi->blocks_per_seg;
|
||||
|
||||
@@ -2284,7 +2354,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
||||
|
||||
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
|
||||
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
|
||||
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
|
||||
|
||||
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
|
||||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
|
||||
@@ -2294,6 +2363,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
|
||||
return 1;
|
||||
}
|
||||
|
||||
cp_pack_start_sum = __start_sum_addr(sbi);
|
||||
cp_payload = __cp_payload(sbi);
|
||||
if (cp_pack_start_sum < cp_payload + 1 ||
|
||||
cp_pack_start_sum > blocks_per_seg - 1 -
|
||||
NR_CURSEG_TYPE) {
|
||||
f2fs_msg(sbi->sb, KERN_ERR,
|
||||
"Wrong cp_pack_start_sum: %u",
|
||||
cp_pack_start_sum);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (unlikely(f2fs_cp_error(sbi))) {
|
||||
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
|
||||
return 1;
|
||||
|
||||
@@ -501,7 +501,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
|
||||
if (args->flags & ATTR_CREATE)
|
||||
return retval;
|
||||
retval = xfs_attr_shortform_remove(args);
|
||||
ASSERT(retval == 0);
|
||||
if (retval)
|
||||
return retval;
|
||||
/*
|
||||
* Since we have removed the old attr, clear ATTR_REPLACE so
|
||||
* that the leaf format add routine won't trip over the attr
|
||||
* not being around.
|
||||
*/
|
||||
args->flags &= ~ATTR_REPLACE;
|
||||
}
|
||||
|
||||
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
|
||||
|
||||
@@ -113,6 +113,7 @@ struct bpf_insn_aux_data {
|
||||
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
|
||||
};
|
||||
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
||||
int sanitize_stack_off; /* stack slot to be cleared */
|
||||
bool seen; /* this insn was processed by the verifier */
|
||||
};
|
||||
|
||||
|
||||
@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
|
||||
/* ensure that an existing authorizer is up to date */
|
||||
int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
|
||||
struct ceph_auth_handshake *auth);
|
||||
int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
|
||||
struct ceph_authorizer *a,
|
||||
void *challenge_buf,
|
||||
int challenge_buf_len);
|
||||
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
|
||||
struct ceph_authorizer *a);
|
||||
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
|
||||
@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
|
||||
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
|
||||
int peer_type,
|
||||
struct ceph_auth_handshake *a);
|
||||
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
|
||||
struct ceph_authorizer *a,
|
||||
void *challenge_buf,
|
||||
int challenge_buf_len);
|
||||
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
|
||||
struct ceph_authorizer *a);
|
||||
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
|
||||
|
||||
@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
|
||||
DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
|
||||
DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
|
||||
DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
|
||||
DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit*
|
||||
DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
|
||||
DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit*
|
||||
|
||||
DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down!
|
||||
DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal
|
||||
DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
|
||||
|
||||
@@ -209,7 +209,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
|
||||
CEPH_FEATURE_SERVER_JEWEL | \
|
||||
CEPH_FEATURE_MON_STATEFUL_SUB | \
|
||||
CEPH_FEATURE_CRUSH_TUNABLES5 | \
|
||||
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
|
||||
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
|
||||
CEPH_FEATURE_CEPHX_V2)
|
||||
|
||||
#define CEPH_FEATURES_REQUIRED_DEFAULT \
|
||||
(CEPH_FEATURE_NOSRCADDR | \
|
||||
|
||||
@@ -31,6 +31,9 @@ struct ceph_connection_operations {
|
||||
struct ceph_auth_handshake *(*get_authorizer) (
|
||||
struct ceph_connection *con,
|
||||
int *proto, int force_new);
|
||||
int (*add_authorizer_challenge)(struct ceph_connection *con,
|
||||
void *challenge_buf,
|
||||
int challenge_buf_len);
|
||||
int (*verify_authorizer_reply) (struct ceph_connection *con);
|
||||
int (*invalidate_authorizer)(struct ceph_connection *con);
|
||||
|
||||
@@ -203,9 +206,8 @@ struct ceph_connection {
|
||||
attempt for this connection, client */
|
||||
u32 peer_global_seq; /* peer's global seq for this connection */
|
||||
|
||||
struct ceph_auth_handshake *auth;
|
||||
int auth_retry; /* true if we need a newer authorizer */
|
||||
void *auth_reply_buf; /* where to put the authorizer reply */
|
||||
int auth_reply_buf_len;
|
||||
|
||||
struct mutex mutex;
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ struct ceph_entity_inst {
|
||||
#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
|
||||
#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
|
||||
#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
|
||||
|
||||
#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */
|
||||
|
||||
/*
|
||||
* connection negotiation
|
||||
|
||||
@@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
|
||||
extern int jump_label_text_reserved(void *start, void *end);
|
||||
extern void static_key_slow_inc(struct static_key *key);
|
||||
extern void static_key_slow_dec(struct static_key *key);
|
||||
extern void static_key_slow_inc_cpuslocked(struct static_key *key);
|
||||
extern void static_key_slow_dec_cpuslocked(struct static_key *key);
|
||||
extern void jump_label_apply_nops(struct module *mod);
|
||||
extern int static_key_count(struct static_key *key);
|
||||
extern void static_key_enable(struct static_key *key);
|
||||
@@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key)
|
||||
atomic_dec(&key->enabled);
|
||||
}
|
||||
|
||||
#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key)
|
||||
#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key)
|
||||
|
||||
static inline int jump_label_text_reserved(void *start, void *end)
|
||||
{
|
||||
return 0;
|
||||
@@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void);
|
||||
|
||||
#define static_branch_inc(x) static_key_slow_inc(&(x)->key)
|
||||
#define static_branch_dec(x) static_key_slow_dec(&(x)->key)
|
||||
#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key)
|
||||
#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key)
|
||||
|
||||
/*
|
||||
* Normal usage; boolean enable/disable.
|
||||
|
||||
@@ -62,8 +62,8 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
|
||||
#define PTRACE_MODE_READ 0x01
|
||||
#define PTRACE_MODE_ATTACH 0x02
|
||||
#define PTRACE_MODE_NOAUDIT 0x04
|
||||
#define PTRACE_MODE_FSCREDS 0x08
|
||||
#define PTRACE_MODE_REALCREDS 0x10
|
||||
#define PTRACE_MODE_FSCREDS 0x08
|
||||
#define PTRACE_MODE_REALCREDS 0x10
|
||||
|
||||
/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
|
||||
#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
|
||||
|
||||
@@ -1492,6 +1492,8 @@ static inline bool is_percpu_thread(void)
|
||||
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
|
||||
#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
|
||||
#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
|
||||
#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
|
||||
#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
|
||||
|
||||
#define TASK_PFA_TEST(name, func) \
|
||||
static inline bool task_##func(struct task_struct *p) \
|
||||
@@ -1523,6 +1525,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
|
||||
TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
|
||||
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
|
||||
|
||||
TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
|
||||
TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
|
||||
TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
|
||||
|
||||
TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
|
||||
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
|
||||
|
||||
static inline void
|
||||
current_restore_flags(unsigned long orig_flags, unsigned long flags)
|
||||
{
|
||||
|
||||
20
include/linux/sched/smt.h
Normal file
20
include/linux/sched/smt.h
Normal file
@@ -0,0 +1,20 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SCHED_SMT_H
|
||||
#define _LINUX_SCHED_SMT_H
|
||||
|
||||
#include <linux/static_key.h>
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
extern struct static_key_false sched_smt_present;
|
||||
|
||||
static __always_inline bool sched_smt_active(void)
|
||||
{
|
||||
return static_branch_likely(&sched_smt_present);
|
||||
}
|
||||
#else
|
||||
static inline bool sched_smt_active(void) { return false; }
|
||||
#endif
|
||||
|
||||
void arch_smt_update(void);
|
||||
|
||||
#endif
|
||||
@@ -1288,6 +1288,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
|
||||
{
|
||||
skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
|
||||
skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
|
||||
}
|
||||
|
||||
static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
|
||||
{
|
||||
return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
|
||||
}
|
||||
|
||||
static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
|
||||
{
|
||||
return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
|
||||
}
|
||||
|
||||
/* Release a reference on a zerocopy structure */
|
||||
static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
|
||||
{
|
||||
@@ -1297,7 +1313,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
|
||||
if (uarg->callback == sock_zerocopy_callback) {
|
||||
uarg->zerocopy = uarg->zerocopy && zerocopy;
|
||||
sock_zerocopy_put(uarg);
|
||||
} else {
|
||||
} else if (!skb_zcopy_is_nouarg(skb)) {
|
||||
uarg->callback(uarg, zerocopy);
|
||||
}
|
||||
|
||||
|
||||
@@ -89,6 +89,8 @@ struct tls_context {
|
||||
|
||||
void *priv_ctx;
|
||||
|
||||
u8 tx_conf:2;
|
||||
|
||||
u16 prepend_size;
|
||||
u16 tag_size;
|
||||
u16 overhead_size;
|
||||
@@ -104,7 +106,6 @@ struct tls_context {
|
||||
|
||||
u16 pending_open_record_frags;
|
||||
int (*push_pending_record)(struct sock *sk, int flags);
|
||||
void (*free_resources)(struct sock *sk);
|
||||
|
||||
void (*sk_write_space)(struct sock *sk);
|
||||
void (*sk_proto_close)(struct sock *sk, long timeout);
|
||||
@@ -129,6 +130,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
|
||||
int tls_sw_sendpage(struct sock *sk, struct page *page,
|
||||
int offset, size_t size, int flags);
|
||||
void tls_sw_close(struct sock *sk, long timeout);
|
||||
void tls_sw_free_tx_resources(struct sock *sk);
|
||||
|
||||
void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
|
||||
void tls_icsk_clean_acked(struct sock *sk);
|
||||
|
||||
@@ -734,6 +734,7 @@ struct btrfs_balance_item {
|
||||
#define BTRFS_FILE_EXTENT_INLINE 0
|
||||
#define BTRFS_FILE_EXTENT_REG 1
|
||||
#define BTRFS_FILE_EXTENT_PREALLOC 2
|
||||
#define BTRFS_FILE_EXTENT_TYPES 2
|
||||
|
||||
struct btrfs_file_extent_item {
|
||||
/*
|
||||
|
||||
@@ -203,6 +203,7 @@ struct prctl_mm_map {
|
||||
#define PR_SET_SPECULATION_CTRL 53
|
||||
/* Speculation control variants */
|
||||
# define PR_SPEC_STORE_BYPASS 0
|
||||
# define PR_SPEC_INDIRECT_BRANCH 1
|
||||
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
|
||||
# define PR_SPEC_NOT_AFFECTED 0
|
||||
# define PR_SPEC_PRCTL (1UL << 0)
|
||||
|
||||
@@ -717,8 +717,9 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
||||
/* check_stack_read/write functions track spill/fill of registers,
|
||||
* stack boundary and alignment are checked in check_mem_access()
|
||||
*/
|
||||
static int check_stack_write(struct bpf_verifier_state *state, int off,
|
||||
int size, int value_regno)
|
||||
static int check_stack_write(struct bpf_verifier_env *env,
|
||||
struct bpf_verifier_state *state, int off,
|
||||
int size, int value_regno, int insn_idx)
|
||||
{
|
||||
int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
|
||||
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
|
||||
@@ -738,8 +739,32 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
|
||||
state->spilled_regs[spi] = state->regs[value_regno];
|
||||
state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
|
||||
|
||||
for (i = 0; i < BPF_REG_SIZE; i++)
|
||||
for (i = 0; i < BPF_REG_SIZE; i++) {
|
||||
if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
|
||||
!env->allow_ptr_leaks) {
|
||||
int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
|
||||
int soff = (-spi - 1) * BPF_REG_SIZE;
|
||||
|
||||
/* detected reuse of integer stack slot with a pointer
|
||||
* which means either llvm is reusing stack slot or
|
||||
* an attacker is trying to exploit CVE-2018-3639
|
||||
* (speculative store bypass)
|
||||
* Have to sanitize that slot with preemptive
|
||||
* store of zero.
|
||||
*/
|
||||
if (*poff && *poff != soff) {
|
||||
/* disallow programs where single insn stores
|
||||
* into two different stack slots, since verifier
|
||||
* cannot sanitize them
|
||||
*/
|
||||
verbose("insn %d cannot access two stack slots fp%d and fp%d",
|
||||
insn_idx, *poff, soff);
|
||||
return -EINVAL;
|
||||
}
|
||||
*poff = soff;
|
||||
}
|
||||
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
|
||||
}
|
||||
} else {
|
||||
/* regular write of data into stack */
|
||||
state->spilled_regs[spi] = (struct bpf_reg_state) {};
|
||||
@@ -1216,7 +1241,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
||||
verbose("attempt to corrupt spilled pointer on stack\n");
|
||||
return -EACCES;
|
||||
}
|
||||
err = check_stack_write(state, off, size, value_regno);
|
||||
err = check_stack_write(env, state, off, size,
|
||||
value_regno, insn_idx);
|
||||
} else {
|
||||
err = check_stack_read(state, off, size, value_regno);
|
||||
}
|
||||
@@ -4270,6 +4296,34 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
||||
else
|
||||
continue;
|
||||
|
||||
if (type == BPF_WRITE &&
|
||||
env->insn_aux_data[i + delta].sanitize_stack_off) {
|
||||
struct bpf_insn patch[] = {
|
||||
/* Sanitize suspicious stack slot with zero.
|
||||
* There are no memory dependencies for this store,
|
||||
* since it's only using frame pointer and immediate
|
||||
* constant of zero
|
||||
*/
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_FP,
|
||||
env->insn_aux_data[i + delta].sanitize_stack_off,
|
||||
0),
|
||||
/* the original STX instruction will immediately
|
||||
* overwrite the same stack slot with appropriate value
|
||||
*/
|
||||
*insn,
|
||||
};
|
||||
|
||||
cnt = ARRAY_SIZE(patch);
|
||||
new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
delta += cnt - 1;
|
||||
env->prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
|
||||
continue;
|
||||
|
||||
|
||||
14
kernel/cpu.c
14
kernel/cpu.c
@@ -10,6 +10,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/hotplug.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sched/smt.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/oom.h>
|
||||
@@ -347,6 +348,12 @@ void cpu_hotplug_enable(void)
|
||||
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
/*
|
||||
* Architectures that need SMT-specific errata handling during SMT hotplug
|
||||
* should override this.
|
||||
*/
|
||||
void __weak arch_smt_update(void) { }
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_SMT
|
||||
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
|
||||
EXPORT_SYMBOL_GPL(cpu_smt_control);
|
||||
@@ -998,6 +1005,7 @@ out:
|
||||
* concurrent CPU hotplug via cpu_add_remove_lock.
|
||||
*/
|
||||
lockup_detector_cleanup();
|
||||
arch_smt_update();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1126,6 +1134,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
|
||||
ret = cpuhp_up_callbacks(cpu, st, target);
|
||||
out:
|
||||
cpus_write_unlock();
|
||||
arch_smt_update();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2078,8 +2087,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
|
||||
*/
|
||||
cpuhp_offline_cpu_device(cpu);
|
||||
}
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
cpu_smt_control = ctrlval;
|
||||
arch_smt_update();
|
||||
}
|
||||
cpu_maps_update_done();
|
||||
return ret;
|
||||
}
|
||||
@@ -2090,6 +2101,7 @@ static int cpuhp_smt_enable(void)
|
||||
|
||||
cpu_maps_update_begin();
|
||||
cpu_smt_control = CPU_SMT_ENABLED;
|
||||
arch_smt_update();
|
||||
for_each_present_cpu(cpu) {
|
||||
/* Skip online CPUs and CPUs on offline nodes */
|
||||
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
|
||||
|
||||
@@ -79,7 +79,7 @@ int static_key_count(struct static_key *key)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(static_key_count);
|
||||
|
||||
static void static_key_slow_inc_cpuslocked(struct static_key *key)
|
||||
void static_key_slow_inc_cpuslocked(struct static_key *key)
|
||||
{
|
||||
int v, v1;
|
||||
|
||||
@@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(static_key_disable);
|
||||
|
||||
static void static_key_slow_dec_cpuslocked(struct static_key *key,
|
||||
static void __static_key_slow_dec_cpuslocked(struct static_key *key,
|
||||
unsigned long rate_limit,
|
||||
struct delayed_work *work)
|
||||
{
|
||||
@@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key,
|
||||
struct delayed_work *work)
|
||||
{
|
||||
cpus_read_lock();
|
||||
static_key_slow_dec_cpuslocked(key, rate_limit, work);
|
||||
__static_key_slow_dec_cpuslocked(key, rate_limit, work);
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
@@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(static_key_slow_dec);
|
||||
|
||||
void static_key_slow_dec_cpuslocked(struct static_key *key)
|
||||
{
|
||||
STATIC_KEY_CHECK_USE();
|
||||
__static_key_slow_dec_cpuslocked(key, 0, NULL);
|
||||
}
|
||||
|
||||
void static_key_slow_dec_deferred(struct static_key_deferred *key)
|
||||
{
|
||||
STATIC_KEY_CHECK_USE();
|
||||
|
||||
@@ -5680,15 +5680,10 @@ int sched_cpu_activate(unsigned int cpu)
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
/*
|
||||
* The sched_smt_present static key needs to be evaluated on every
|
||||
* hotplug event because at boot time SMT might be disabled when
|
||||
* the number of booted CPUs is limited.
|
||||
*
|
||||
* If then later a sibling gets hotplugged, then the key would stay
|
||||
* off and SMT scheduling would never be functional.
|
||||
* When going up, increment the number of cores with SMT present.
|
||||
*/
|
||||
if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
|
||||
static_branch_enable_cpuslocked(&sched_smt_present);
|
||||
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
|
||||
static_branch_inc_cpuslocked(&sched_smt_present);
|
||||
#endif
|
||||
set_cpu_active(cpu, true);
|
||||
|
||||
@@ -5732,6 +5727,14 @@ int sched_cpu_deactivate(unsigned int cpu)
|
||||
*/
|
||||
synchronize_rcu_mult(call_rcu, call_rcu_sched);
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
/*
|
||||
* When going down, decrement the number of cores with SMT present.
|
||||
*/
|
||||
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
|
||||
static_branch_dec_cpuslocked(&sched_smt_present);
|
||||
#endif
|
||||
|
||||
if (!sched_smp_initialized)
|
||||
return 0;
|
||||
|
||||
|
||||
@@ -4311,12 +4311,12 @@ static inline bool cfs_bandwidth_used(void)
|
||||
|
||||
void cfs_bandwidth_usage_inc(void)
|
||||
{
|
||||
static_key_slow_inc(&__cfs_bandwidth_used);
|
||||
static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
|
||||
}
|
||||
|
||||
void cfs_bandwidth_usage_dec(void)
|
||||
{
|
||||
static_key_slow_dec(&__cfs_bandwidth_used);
|
||||
static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
|
||||
}
|
||||
#else /* HAVE_JUMP_LABEL */
|
||||
static bool cfs_bandwidth_used(void)
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/init.h>
|
||||
#include <linux/sched/smt.h>
|
||||
|
||||
#include <linux/u64_stats_sync.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
@@ -865,9 +866,6 @@ static inline int cpu_of(struct rq *rq)
|
||||
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
|
||||
extern struct static_key_false sched_smt_present;
|
||||
|
||||
extern void __update_idle_core(struct rq *rq);
|
||||
|
||||
static inline void update_idle_core(struct rq *rq)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user