[ALPS05012802] [Do NOT Sync]Merge branch android-4.14 into alps-trunk-r0.basic
[Detail] Parent:54236917c6Merge 4.14.141 into android-4.14 Start:a9d0871a56ANDROID: fiq_debugger: remove Target:234de92896Merge 4.14.150 into android-4.14 MTK-Commit-Id: 2787c355dfe4c7cb54fdbdb5b5099245ff4502b3 Feature: Others Change-Id: I77184a925c03fcfd29597e0fbf526580e346f90e CR-Id: ALPS05012802 Signed-off-by: Breeze.Li <breeze.li@mediatek.com>
This commit is contained in:
76
Documentation/ABI/testing/sysfs-class-wakeup
Normal file
76
Documentation/ABI/testing/sysfs-class-wakeup
Normal file
@@ -0,0 +1,76 @@
|
||||
What: /sys/class/wakeup/
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
The /sys/class/wakeup/ directory contains pointers to all
|
||||
wakeup sources in the kernel at that moment in time.
|
||||
|
||||
What: /sys/class/wakeup/.../name
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the name of the wakeup source.
|
||||
|
||||
What: /sys/class/wakeup/.../active_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of times the wakeup source was
|
||||
activated.
|
||||
|
||||
What: /sys/class/wakeup/.../event_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of signaled wakeup events
|
||||
associated with the wakeup source.
|
||||
|
||||
What: /sys/class/wakeup/.../wakeup_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of times the wakeup source might
|
||||
abort suspend.
|
||||
|
||||
What: /sys/class/wakeup/.../expire_count
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the number of times the wakeup source's
|
||||
timeout has expired.
|
||||
|
||||
What: /sys/class/wakeup/.../active_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the amount of time the wakeup source has
|
||||
been continuously active, in milliseconds. If the wakeup
|
||||
source is not active, this file contains '0'.
|
||||
|
||||
What: /sys/class/wakeup/.../total_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the total amount of time this wakeup source
|
||||
has been active, in milliseconds.
|
||||
|
||||
What: /sys/class/wakeup/.../max_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the maximum amount of time this wakeup
|
||||
source has been continuously active, in milliseconds.
|
||||
|
||||
What: /sys/class/wakeup/.../last_change_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
This file contains the monotonic clock time when the wakeup
|
||||
source was touched last time, in milliseconds.
|
||||
|
||||
What: /sys/class/wakeup/.../prevent_suspend_time_ms
|
||||
Date: June 2019
|
||||
Contact: Tri Vo <trong@android.com>
|
||||
Description:
|
||||
The file contains the total amount of time this wakeup source
|
||||
has been preventing autosleep, in milliseconds.
|
||||
@@ -243,3 +243,18 @@ Description:
|
||||
- Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list
|
||||
- [h] means add/del hot file extension
|
||||
- [c] means add/del cold file extension
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/unusable
|
||||
Date April 2019
|
||||
Contact: "Daniel Rosenberg" <drosen@google.com>
|
||||
Description:
|
||||
If checkpoint=disable, it displays the number of blocks that are unusable.
|
||||
If checkpoint=enable it displays the enumber of blocks that would be unusable
|
||||
if checkpoint=disable were to be set.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/encoding
|
||||
Date July 2019
|
||||
Contact: "Daniel Rosenberg" <drosen@google.com>
|
||||
Description:
|
||||
Displays name and version of the encoding set for the filesystem.
|
||||
If no encoding is set, displays (none)
|
||||
|
||||
@@ -285,3 +285,123 @@ Description:
|
||||
Writing a "1" to this file enables the debug messages and
|
||||
writing a "0" (default) to it disables them. Reads from
|
||||
this file return the current value.
|
||||
|
||||
What: /sys/power/resume_offset
|
||||
Date: April 2018
|
||||
Contact: Mario Limonciello <mario.limonciello@dell.com>
|
||||
Description:
|
||||
This file is used for telling the kernel an offset into a disk
|
||||
to use when hibernating the system such as with a swap file.
|
||||
|
||||
Reads from this file will display the current offset
|
||||
the kernel will be using on the next hibernation
|
||||
attempt.
|
||||
|
||||
Using this sysfs file will override any values that were
|
||||
set using the kernel command line for disk offset.
|
||||
|
||||
What: /sys/power/suspend_stats
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats directory contains suspend related
|
||||
statistics.
|
||||
|
||||
What: /sys/power/suspend_stats/success
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/success file contains the number
|
||||
of times entering system sleep state succeeded.
|
||||
|
||||
What: /sys/power/suspend_stats/fail
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/fail file contains the number
|
||||
of times entering system sleep state failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_freeze
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_freeze file contains the
|
||||
number of times freezing processes failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_prepare
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_prepare file contains the
|
||||
number of times preparing all non-sysdev devices for
|
||||
a system PM transition failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_resume
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_resume file contains the
|
||||
number of times executing "resume" callbacks of
|
||||
non-sysdev devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_resume_early
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_resume_early file contains
|
||||
the number of times executing "early resume" callbacks
|
||||
of devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_resume_noirq
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_resume_noirq file contains
|
||||
the number of times executing "noirq resume" callbacks
|
||||
of devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_suspend
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_suspend file contains
|
||||
the number of times executing "suspend" callbacks
|
||||
of all non-sysdev devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_suspend_late
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_suspend_late file contains
|
||||
the number of times executing "late suspend" callbacks
|
||||
of all devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/failed_suspend_noirq
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/failed_suspend_noirq file contains
|
||||
the number of times executing "noirq suspend" callbacks
|
||||
of all devices failed.
|
||||
|
||||
What: /sys/power/suspend_stats/last_failed_dev
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/last_failed_dev file contains
|
||||
the last device for which a suspend/resume callback failed.
|
||||
|
||||
What: /sys/power/suspend_stats/last_failed_errno
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/last_failed_errno file contains
|
||||
the errno of the last failed attempt at entering
|
||||
system sleep state.
|
||||
|
||||
What: /sys/power/suspend_stats/last_failed_step
|
||||
Date: July 2019
|
||||
Contact: Kalesh Singh <kaleshsingh96@gmail.com>
|
||||
Description:
|
||||
The /sys/power/suspend_stats/last_failed_step file contains
|
||||
the last failed step in the suspend/resume path.
|
||||
|
||||
@@ -1568,6 +1568,15 @@
|
||||
|
||||
initrd= [BOOT] Specify the location of the initial ramdisk
|
||||
|
||||
init_on_alloc= [MM] Fill newly allocated pages and heap objects with
|
||||
zeroes.
|
||||
Format: 0 | 1
|
||||
Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
|
||||
|
||||
init_on_free= [MM] Fill freed pages and heap objects with zeroes.
|
||||
Format: 0 | 1
|
||||
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
|
||||
|
||||
init_pkru= [x86] Specify the default memory protection keys rights
|
||||
register contents for all processes. 0x55555554 by
|
||||
default (disallow access to all but pkey 0). Can
|
||||
|
||||
156
Documentation/arm64/tagged-address-abi.rst
Normal file
156
Documentation/arm64/tagged-address-abi.rst
Normal file
@@ -0,0 +1,156 @@
|
||||
==========================
|
||||
AArch64 TAGGED ADDRESS ABI
|
||||
==========================
|
||||
|
||||
Authors: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||
Catalin Marinas <catalin.marinas@arm.com>
|
||||
|
||||
Date: 21 August 2019
|
||||
|
||||
This document describes the usage and semantics of the Tagged Address
|
||||
ABI on AArch64 Linux.
|
||||
|
||||
1. Introduction
|
||||
---------------
|
||||
|
||||
On AArch64 the ``TCR_EL1.TBI0`` bit is set by default, allowing
|
||||
userspace (EL0) to perform memory accesses through 64-bit pointers with
|
||||
a non-zero top byte. This document describes the relaxation of the
|
||||
syscall ABI that allows userspace to pass certain tagged pointers to
|
||||
kernel syscalls.
|
||||
|
||||
2. AArch64 Tagged Address ABI
|
||||
-----------------------------
|
||||
|
||||
From the kernel syscall interface perspective and for the purposes of
|
||||
this document, a "valid tagged pointer" is a pointer with a potentially
|
||||
non-zero top-byte that references an address in the user process address
|
||||
space obtained in one of the following ways:
|
||||
|
||||
- ``mmap()`` syscall where either:
|
||||
|
||||
- flags have the ``MAP_ANONYMOUS`` bit set or
|
||||
- the file descriptor refers to a regular file (including those
|
||||
returned by ``memfd_create()``) or ``/dev/zero``
|
||||
|
||||
- ``brk()`` syscall (i.e. the heap area between the initial location of
|
||||
the program break at process creation and its current location).
|
||||
|
||||
- any memory mapped by the kernel in the address space of the process
|
||||
during creation and with the same restrictions as for ``mmap()`` above
|
||||
(e.g. data, bss, stack).
|
||||
|
||||
The AArch64 Tagged Address ABI has two stages of relaxation depending
|
||||
how the user addresses are used by the kernel:
|
||||
|
||||
1. User addresses not accessed by the kernel but used for address space
|
||||
management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use
|
||||
of valid tagged pointers in this context is always allowed.
|
||||
|
||||
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
|
||||
relaxation is disabled by default and the application thread needs to
|
||||
explicitly enable it via ``prctl()`` as follows:
|
||||
|
||||
- ``PR_SET_TAGGED_ADDR_CTRL``: enable or disable the AArch64 Tagged
|
||||
Address ABI for the calling thread.
|
||||
|
||||
The ``(unsigned int) arg2`` argument is a bit mask describing the
|
||||
control mode used:
|
||||
|
||||
- ``PR_TAGGED_ADDR_ENABLE``: enable AArch64 Tagged Address ABI.
|
||||
Default status is disabled.
|
||||
|
||||
Arguments ``arg3``, ``arg4``, and ``arg5`` must be 0.
|
||||
|
||||
- ``PR_GET_TAGGED_ADDR_CTRL``: get the status of the AArch64 Tagged
|
||||
Address ABI for the calling thread.
|
||||
|
||||
Arguments ``arg2``, ``arg3``, ``arg4``, and ``arg5`` must be 0.
|
||||
|
||||
The ABI properties described above are thread-scoped, inherited on
|
||||
clone() and fork() and cleared on exec().
|
||||
|
||||
Calling ``prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0)``
|
||||
returns ``-EINVAL`` if the AArch64 Tagged Address ABI is globally
|
||||
disabled by ``sysctl abi.tagged_addr_disabled=1``. The default
|
||||
``sysctl abi.tagged_addr_disabled`` configuration is 0.
|
||||
|
||||
When the AArch64 Tagged Address ABI is enabled for a thread, the
|
||||
following behaviours are guaranteed:
|
||||
|
||||
- All syscalls except the cases mentioned in section 3 can accept any
|
||||
valid tagged pointer.
|
||||
|
||||
- The syscall behaviour is undefined for invalid tagged pointers: it may
|
||||
result in an error code being returned, a (fatal) signal being raised,
|
||||
or other modes of failure.
|
||||
|
||||
- The syscall behaviour for a valid tagged pointer is the same as for
|
||||
the corresponding untagged pointer.
|
||||
|
||||
|
||||
A definition of the meaning of tagged pointers on AArch64 can be found
|
||||
in Documentation/arm64/tagged-pointers.rst.
|
||||
|
||||
3. AArch64 Tagged Address ABI Exceptions
|
||||
-----------------------------------------
|
||||
|
||||
The following system call parameters must be untagged regardless of the
|
||||
ABI relaxation:
|
||||
|
||||
- ``prctl()`` other than pointers to user data either passed directly or
|
||||
indirectly as arguments to be accessed by the kernel.
|
||||
|
||||
- ``ioctl()`` other than pointers to user data either passed directly or
|
||||
indirectly as arguments to be accessed by the kernel.
|
||||
|
||||
- ``shmat()`` and ``shmdt()``.
|
||||
|
||||
Any attempt to use non-zero tagged pointers may result in an error code
|
||||
being returned, a (fatal) signal being raised, or other modes of
|
||||
failure.
|
||||
|
||||
4. Example of correct usage
|
||||
---------------------------
|
||||
.. code-block:: c
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#define PR_SET_TAGGED_ADDR_CTRL 55
|
||||
#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
|
||||
|
||||
#define TAG_SHIFT 56
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int tbi_enabled = 0;
|
||||
unsigned long tag = 0;
|
||||
char *ptr;
|
||||
|
||||
/* check/enable the tagged address ABI */
|
||||
if (!prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0))
|
||||
tbi_enabled = 1;
|
||||
|
||||
/* memory allocation */
|
||||
ptr = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (ptr == MAP_FAILED)
|
||||
return 1;
|
||||
|
||||
/* set a non-zero tag if the ABI is available */
|
||||
if (tbi_enabled)
|
||||
tag = rand() & 0xff;
|
||||
ptr = (char *)((unsigned long)ptr | (tag << TAG_SHIFT));
|
||||
|
||||
/* memory access to a tagged address */
|
||||
strcpy(ptr, "tagged pointer\n");
|
||||
|
||||
/* syscall with a tagged pointer */
|
||||
write(1, ptr, strlen(ptr));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -18,7 +18,9 @@ Passing tagged addresses to the kernel
|
||||
--------------------------------------
|
||||
|
||||
All interpretation of userspace memory addresses by the kernel assumes
|
||||
an address tag of 0x00.
|
||||
an address tag of 0x00, unless the application enables the AArch64
|
||||
Tagged Address ABI explicitly
|
||||
(Documentation/arm64/tagged-address-abi.rst).
|
||||
|
||||
This includes, but is not limited to, addresses found in:
|
||||
|
||||
@@ -31,13 +33,15 @@ This includes, but is not limited to, addresses found in:
|
||||
- the frame pointer (x29) and frame records, e.g. when interpreting
|
||||
them to generate a backtrace or call graph.
|
||||
|
||||
Using non-zero address tags in any of these locations may result in an
|
||||
error code being returned, a (fatal) signal being raised, or other modes
|
||||
of failure.
|
||||
Using non-zero address tags in any of these locations when the
|
||||
userspace application did not enable the AArch64 Tagged Address ABI may
|
||||
result in an error code being returned, a (fatal) signal being raised,
|
||||
or other modes of failure.
|
||||
|
||||
For these reasons, passing non-zero address tags to the kernel via
|
||||
system calls is forbidden, and using a non-zero address tag for sp is
|
||||
strongly discouraged.
|
||||
For these reasons, when the AArch64 Tagged Address ABI is disabled,
|
||||
passing non-zero address tags to the kernel via system calls is
|
||||
forbidden, and using a non-zero address tag for sp is strongly
|
||||
discouraged.
|
||||
|
||||
Programs maintaining a frame pointer and frame records that use non-zero
|
||||
address tags may suffer impaired or inaccurate debug and profiling
|
||||
@@ -57,6 +61,9 @@ be preserved.
|
||||
The architecture prevents the use of a tagged PC, so the upper byte will
|
||||
be set to a sign-extension of bit 55 on exception return.
|
||||
|
||||
This behaviour is maintained when the AArch64 Tagged Address ABI is
|
||||
enabled.
|
||||
|
||||
|
||||
Other considerations
|
||||
--------------------
|
||||
|
||||
@@ -34,10 +34,6 @@ Configure the kernel with::
|
||||
CONFIG_DEBUG_FS=y
|
||||
CONFIG_GCOV_KERNEL=y
|
||||
|
||||
select the gcc's gcov format, default is autodetect based on gcc version::
|
||||
|
||||
CONFIG_GCOV_FORMAT_AUTODETECT=y
|
||||
|
||||
and to get coverage data for the entire kernel::
|
||||
|
||||
CONFIG_GCOV_PROFILE_ALL=y
|
||||
@@ -169,6 +165,20 @@ b) gcov is run on the BUILD machine
|
||||
[user@build] gcov -o /tmp/coverage/tmp/out/init main.c
|
||||
|
||||
|
||||
Note on compilers
|
||||
-----------------
|
||||
|
||||
GCC and LLVM gcov tools are not necessarily compatible. Use gcov_ to work with
|
||||
GCC-generated .gcno and .gcda files, and use llvm-cov_ for Clang.
|
||||
|
||||
.. _gcov: http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
|
||||
.. _llvm-cov: https://llvm.org/docs/CommandGuide/llvm-cov.html
|
||||
|
||||
Build differences between GCC and Clang gcov are handled by Kconfig. It
|
||||
automatically selects the appropriate gcov format depending on the detected
|
||||
toolchain.
|
||||
|
||||
|
||||
Troubleshooting
|
||||
---------------
|
||||
|
||||
|
||||
@@ -4,15 +4,25 @@ The Kernel Address Sanitizer (KASAN)
|
||||
Overview
|
||||
--------
|
||||
|
||||
KernelAddressSANitizer (KASAN) is a dynamic memory error detector. It provides
|
||||
a fast and comprehensive solution for finding use-after-free and out-of-bounds
|
||||
bugs.
|
||||
KernelAddressSANitizer (KASAN) is a dynamic memory error detector designed to
|
||||
find out-of-bound and use-after-free bugs. KASAN has two modes: generic KASAN
|
||||
(similar to userspace ASan) and software tag-based KASAN (similar to userspace
|
||||
HWASan).
|
||||
|
||||
KASAN uses compile-time instrumentation for checking every memory access,
|
||||
therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
|
||||
required for detection of out-of-bounds accesses to stack or global variables.
|
||||
KASAN uses compile-time instrumentation to insert validity checks before every
|
||||
memory access, and therefore requires a compiler version that supports that.
|
||||
|
||||
Currently KASAN is supported only for the x86_64 and arm64 architectures.
|
||||
Generic KASAN is supported in both GCC and Clang. With GCC it requires version
|
||||
4.9.2 or later for basic support and version 5.0 or later for detection of
|
||||
out-of-bounds accesses for stack and global variables and for inline
|
||||
instrumentation mode (see the Usage section). With Clang it requires version
|
||||
7.0.0 or later and it doesn't support detection of out-of-bounds accesses for
|
||||
global variables yet.
|
||||
|
||||
Tag-based KASAN is only supported in Clang and requires version 7.0.0 or later.
|
||||
|
||||
Currently generic KASAN is supported for the x86_64, arm64 architectures,
|
||||
and tag-based KASAN is supported only for arm64.
|
||||
|
||||
Usage
|
||||
-----
|
||||
@@ -21,12 +31,14 @@ To enable KASAN configure kernel with::
|
||||
|
||||
CONFIG_KASAN = y
|
||||
|
||||
and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline and
|
||||
inline are compiler instrumentation types. The former produces smaller binary
|
||||
the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
|
||||
version 5.0 or later.
|
||||
and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN) and
|
||||
CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN).
|
||||
|
||||
KASAN works with both SLUB and SLAB memory allocators.
|
||||
You also need to choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE.
|
||||
Outline and inline are compiler instrumentation types. The former produces
|
||||
smaller binary while the latter is 1.1 - 2 times faster.
|
||||
|
||||
Both KASAN modes work with both SLUB and SLAB memory allocators.
|
||||
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
|
||||
|
||||
To disable instrumentation for specific files or directories, add a line
|
||||
@@ -43,85 +55,85 @@ similar to the following to the respective kernel Makefile:
|
||||
Error reports
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
A typical out of bounds access report looks like this::
|
||||
A typical out-of-bounds access generic KASAN report looks like this::
|
||||
|
||||
==================================================================
|
||||
BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3
|
||||
Write of size 1 by task modprobe/1689
|
||||
=============================================================================
|
||||
BUG kmalloc-128 (Not tainted): kasan error
|
||||
-----------------------------------------------------------------------------
|
||||
BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan]
|
||||
Write of size 1 at addr ffff8801f44ec37b by task insmod/2760
|
||||
|
||||
Disabling lock debugging due to kernel taint
|
||||
INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689
|
||||
__slab_alloc+0x4b4/0x4f0
|
||||
kmem_cache_alloc_trace+0x10b/0x190
|
||||
kmalloc_oob_right+0x3d/0x75 [test_kasan]
|
||||
init_module+0x9/0x47 [test_kasan]
|
||||
do_one_initcall+0x99/0x200
|
||||
load_module+0x2cb3/0x3b20
|
||||
SyS_finit_module+0x76/0x80
|
||||
system_call_fastpath+0x12/0x17
|
||||
INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080
|
||||
INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720
|
||||
|
||||
Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
|
||||
Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
|
||||
Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc ........
|
||||
Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
|
||||
CPU: 0 PID: 1689 Comm: modprobe Tainted: G B 3.18.0-rc1-mm1+ #98
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
|
||||
ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78
|
||||
ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8
|
||||
ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558
|
||||
CPU: 1 PID: 2760 Comm: insmod Not tainted 4.19.0-rc3+ #698
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
|
||||
Call Trace:
|
||||
[<ffffffff81cc68ae>] dump_stack+0x46/0x58
|
||||
[<ffffffff811fd848>] print_trailer+0xf8/0x160
|
||||
[<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
|
||||
[<ffffffff811ff0f5>] object_err+0x35/0x40
|
||||
[<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
|
||||
[<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0
|
||||
[<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
|
||||
[<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40
|
||||
[<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
|
||||
[<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
|
||||
[<ffffffff8120a995>] __asan_store1+0x75/0xb0
|
||||
[<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan]
|
||||
[<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
|
||||
[<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan]
|
||||
[<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan]
|
||||
[<ffffffff810002d9>] do_one_initcall+0x99/0x200
|
||||
[<ffffffff811e4e5c>] ? __vunmap+0xec/0x160
|
||||
[<ffffffff81114f63>] load_module+0x2cb3/0x3b20
|
||||
[<ffffffff8110fd70>] ? m_show+0x240/0x240
|
||||
[<ffffffff81115f06>] SyS_finit_module+0x76/0x80
|
||||
[<ffffffff81cd3129>] system_call_fastpath+0x12/0x17
|
||||
dump_stack+0x94/0xd8
|
||||
print_address_description+0x73/0x280
|
||||
kasan_report+0x144/0x187
|
||||
__asan_report_store1_noabort+0x17/0x20
|
||||
kmalloc_oob_right+0xa8/0xbc [test_kasan]
|
||||
kmalloc_tests_init+0x16/0x700 [test_kasan]
|
||||
do_one_initcall+0xa5/0x3ae
|
||||
do_init_module+0x1b6/0x547
|
||||
load_module+0x75df/0x8070
|
||||
__do_sys_init_module+0x1c6/0x200
|
||||
__x64_sys_init_module+0x6e/0xb0
|
||||
do_syscall_64+0x9f/0x2c0
|
||||
entry_SYSCALL_64_after_hwframe+0x44/0xa9
|
||||
RIP: 0033:0x7f96443109da
|
||||
RSP: 002b:00007ffcf0b51b08 EFLAGS: 00000202 ORIG_RAX: 00000000000000af
|
||||
RAX: ffffffffffffffda RBX: 000055dc3ee521a0 RCX: 00007f96443109da
|
||||
RDX: 00007f96445cff88 RSI: 0000000000057a50 RDI: 00007f9644992000
|
||||
RBP: 000055dc3ee510b0 R08: 0000000000000003 R09: 0000000000000000
|
||||
R10: 00007f964430cd0a R11: 0000000000000202 R12: 00007f96445cff88
|
||||
R13: 000055dc3ee51090 R14: 0000000000000000 R15: 0000000000000000
|
||||
|
||||
Allocated by task 2760:
|
||||
save_stack+0x43/0xd0
|
||||
kasan_kmalloc+0xa7/0xd0
|
||||
kmem_cache_alloc_trace+0xe1/0x1b0
|
||||
kmalloc_oob_right+0x56/0xbc [test_kasan]
|
||||
kmalloc_tests_init+0x16/0x700 [test_kasan]
|
||||
do_one_initcall+0xa5/0x3ae
|
||||
do_init_module+0x1b6/0x547
|
||||
load_module+0x75df/0x8070
|
||||
__do_sys_init_module+0x1c6/0x200
|
||||
__x64_sys_init_module+0x6e/0xb0
|
||||
do_syscall_64+0x9f/0x2c0
|
||||
entry_SYSCALL_64_after_hwframe+0x44/0xa9
|
||||
|
||||
Freed by task 815:
|
||||
save_stack+0x43/0xd0
|
||||
__kasan_slab_free+0x135/0x190
|
||||
kasan_slab_free+0xe/0x10
|
||||
kfree+0x93/0x1a0
|
||||
umh_complete+0x6a/0xa0
|
||||
call_usermodehelper_exec_async+0x4c3/0x640
|
||||
ret_from_fork+0x35/0x40
|
||||
|
||||
The buggy address belongs to the object at ffff8801f44ec300
|
||||
which belongs to the cache kmalloc-128 of size 128
|
||||
The buggy address is located 123 bytes inside of
|
||||
128-byte region [ffff8801f44ec300, ffff8801f44ec380)
|
||||
The buggy address belongs to the page:
|
||||
page:ffffea0007d13b00 count:1 mapcount:0 mapping:ffff8801f7001640 index:0x0
|
||||
flags: 0x200000000000100(slab)
|
||||
raw: 0200000000000100 ffffea0007d11dc0 0000001a0000001a ffff8801f7001640
|
||||
raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000
|
||||
page dumped because: kasan: bad access detected
|
||||
|
||||
Memory state around the buggy address:
|
||||
ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc
|
||||
ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00
|
||||
>ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc
|
||||
^
|
||||
ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|
||||
ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb
|
||||
ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
|
||||
ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
|
||||
ffff8801f44ec200: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
|
||||
ffff8801f44ec280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
>ffff8801f44ec300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03
|
||||
^
|
||||
ffff8801f44ec380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
|
||||
ffff8801f44ec400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
|
||||
==================================================================
|
||||
|
||||
The header of the report discribe what kind of bug happened and what kind of
|
||||
access caused it. It's followed by the description of the accessed slub object
|
||||
(see 'SLUB Debug output' section in Documentation/vm/slub.txt for details) and
|
||||
the description of the accessed memory page.
|
||||
The header of the report provides a short summary of what kind of bug happened
|
||||
and what kind of access caused it. It's followed by a stack trace of the bad
|
||||
access, a stack trace of where the accessed memory was allocated (in case bad
|
||||
access happens on a slab object), and a stack trace of where the object was
|
||||
freed (in case of a use-after-free bug report). Next comes a description of
|
||||
the accessed slab object and information about the accessed memory page.
|
||||
|
||||
In the last section the report shows memory state around the accessed address.
|
||||
Reading this part requires some understanding of how KASAN works.
|
||||
@@ -138,18 +150,24 @@ inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h).
|
||||
In the report above the arrows point to the shadow byte 03, which means that
|
||||
the accessed address is partially accessible.
|
||||
|
||||
For tag-based KASAN this last report section shows the memory tags around the
|
||||
accessed address (see Implementation details section).
|
||||
|
||||
|
||||
Implementation details
|
||||
----------------------
|
||||
|
||||
Generic KASAN
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
From a high level, our approach to memory error detection is similar to that
|
||||
of kmemcheck: use shadow memory to record whether each byte of memory is safe
|
||||
to access, and use compile-time instrumentation to check shadow memory on each
|
||||
memory access.
|
||||
to access, and use compile-time instrumentation to insert checks of shadow
|
||||
memory on each memory access.
|
||||
|
||||
AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory
|
||||
(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and
|
||||
offset to translate a memory address to its corresponding shadow address.
|
||||
Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (e.g. 16TB
|
||||
to cover 128TB on x86_64) and uses direct mapping with a scale and offset to
|
||||
translate a memory address to its corresponding shadow address.
|
||||
|
||||
Here is the function which translates an address to its corresponding shadow
|
||||
address::
|
||||
@@ -162,12 +180,38 @@ address::
|
||||
|
||||
where ``KASAN_SHADOW_SCALE_SHIFT = 3``.
|
||||
|
||||
Compile-time instrumentation used for checking memory accesses. Compiler inserts
|
||||
function calls (__asan_load*(addr), __asan_store*(addr)) before each memory
|
||||
access of size 1, 2, 4, 8 or 16. These functions check whether memory access is
|
||||
valid or not by checking corresponding shadow memory.
|
||||
Compile-time instrumentation is used to insert memory access checks. Compiler
|
||||
inserts function calls (__asan_load*(addr), __asan_store*(addr)) before each
|
||||
memory access of size 1, 2, 4, 8 or 16. These functions check whether memory
|
||||
access is valid or not by checking corresponding shadow memory.
|
||||
|
||||
GCC 5.0 has possibility to perform inline instrumentation. Instead of making
|
||||
function calls GCC directly inserts the code to check the shadow memory.
|
||||
This option significantly enlarges kernel but it gives x1.1-x2 performance
|
||||
boost over outline instrumented kernel.
|
||||
|
||||
Software tag-based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Tag-based KASAN uses the Top Byte Ignore (TBI) feature of modern arm64 CPUs to
|
||||
store a pointer tag in the top byte of kernel pointers. Like generic KASAN it
|
||||
uses shadow memory to store memory tags associated with each 16-byte memory
|
||||
cell (therefore it dedicates 1/16th of the kernel memory for shadow memory).
|
||||
|
||||
On each memory allocation tag-based KASAN generates a random tag, tags the
|
||||
allocated memory with this tag, and embeds this tag into the returned pointer.
|
||||
Software tag-based KASAN uses compile-time instrumentation to insert checks
|
||||
before each memory access. These checks make sure that tag of the memory that
|
||||
is being accessed is equal to tag of the pointer that is used to access this
|
||||
memory. In case of a tag mismatch tag-based KASAN prints a bug report.
|
||||
|
||||
Software tag-based KASAN also has two instrumentation modes (outline, that
|
||||
emits callbacks to check memory accesses; and inline, that performs the shadow
|
||||
memory checks inline). With outline instrumentation mode, a bug report is
|
||||
simply printed from the function that performs the access check. With inline
|
||||
instrumentation a brk instruction is emitted by the compiler, and a dedicated
|
||||
brk handler is used to print bug reports.
|
||||
|
||||
A potential expansion of this mode is a hardware tag-based mode, which would
|
||||
use hardware memory tagging support instead of compiler instrumentation and
|
||||
manual shadow memory manipulation.
|
||||
|
||||
@@ -177,6 +177,7 @@ mkprep
|
||||
mkregtable
|
||||
mktables
|
||||
mktree
|
||||
mkutf8data
|
||||
modpost
|
||||
modules.builtin
|
||||
modules.order
|
||||
@@ -255,6 +256,7 @@ vsyscall_32.lds
|
||||
wanxlfw.inc
|
||||
uImage
|
||||
unifdef
|
||||
utf8data.h
|
||||
wakeup.bin
|
||||
wakeup.elf
|
||||
wakeup.lds
|
||||
|
||||
@@ -34,6 +34,6 @@
|
||||
| tile: | TODO |
|
||||
| um: | TODO |
|
||||
| unicore32: | TODO |
|
||||
| x86: | ok |
|
||||
| x86: | ok | 64-bit only
|
||||
| xtensa: | TODO |
|
||||
-----------------------
|
||||
|
||||
41
Documentation/filesystems/ext4/verity.rst
Normal file
41
Documentation/filesystems/ext4/verity.rst
Normal file
@@ -0,0 +1,41 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Verity files
|
||||
------------
|
||||
|
||||
ext4 supports fs-verity, which is a filesystem feature that provides
|
||||
Merkle tree based hashing for individual readonly files. Most of
|
||||
fs-verity is common to all filesystems that support it; see
|
||||
:ref:`Documentation/filesystems/fsverity.rst <fsverity>` for the
|
||||
fs-verity documentation. However, the on-disk layout of the verity
|
||||
metadata is filesystem-specific. On ext4, the verity metadata is
|
||||
stored after the end of the file data itself, in the following format:
|
||||
|
||||
- Zero-padding to the next 65536-byte boundary. This padding need not
|
||||
actually be allocated on-disk, i.e. it may be a hole.
|
||||
|
||||
- The Merkle tree, as documented in
|
||||
:ref:`Documentation/filesystems/fsverity.rst
|
||||
<fsverity_merkle_tree>`, with the tree levels stored in order from
|
||||
root to leaf, and the tree blocks within each level stored in their
|
||||
natural order.
|
||||
|
||||
- Zero-padding to the next filesystem block boundary.
|
||||
|
||||
- The verity descriptor, as documented in
|
||||
:ref:`Documentation/filesystems/fsverity.rst <fsverity_descriptor>`,
|
||||
with optionally appended signature blob.
|
||||
|
||||
- Zero-padding to the next offset that is 4 bytes before a filesystem
|
||||
block boundary.
|
||||
|
||||
- The size of the verity descriptor in bytes, as a 4-byte little
|
||||
endian integer.
|
||||
|
||||
Verity inodes have EXT4_VERITY_FL set, and they must use extents, i.e.
|
||||
EXT4_EXTENTS_FL must be set and EXT4_INLINE_DATA_FL must be clear.
|
||||
They can have EXT4_ENCRYPT_FL set, in which case the verity metadata
|
||||
is encrypted as well as the data itself.
|
||||
|
||||
Verity files cannot have blocks allocated past the end of the verity
|
||||
metadata.
|
||||
@@ -157,6 +157,11 @@ noinline_data Disable the inline data feature, inline data feature is
|
||||
enabled by default.
|
||||
data_flush Enable data flushing before checkpoint in order to
|
||||
persist data of regular and symlink.
|
||||
reserve_root=%d Support configuring reserved space which is used for
|
||||
allocation from a privileged user with specified uid or
|
||||
gid, unit: 4KB, the default limit is 0.2% of user blocks.
|
||||
resuid=%d The user ID which may use the reserved blocks.
|
||||
resgid=%d The group ID which may use the reserved blocks.
|
||||
fault_injection=%d Enable fault injection in all supported types with
|
||||
specified injection rate.
|
||||
fault_type=%d Support configuring fault injection type, should be
|
||||
@@ -214,11 +219,22 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix",
|
||||
non-atomic files likewise "nobarrier" mount option.
|
||||
test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt
|
||||
context. The fake fscrypt context is used by xfstests.
|
||||
checkpoint=%s Set to "disable" to turn off checkpointing. Set to "enable"
|
||||
checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable"
|
||||
to reenable checkpointing. Is enabled by default. While
|
||||
disabled, any unmounting or unexpected shutdowns will cause
|
||||
the filesystem contents to appear as they did when the
|
||||
filesystem was mounted with that option.
|
||||
While mounting with checkpoint=disabled, the filesystem must
|
||||
run garbage collection to ensure that all available space can
|
||||
be used. If this takes too much time, the mount may return
|
||||
EAGAIN. You may optionally add a value to indicate how much
|
||||
of the disk you would be willing to temporarily give up to
|
||||
avoid additional garbage collection. This can be given as a
|
||||
number of blocks, or as a percent. For instance, mounting
|
||||
with checkpoint=disable:100% would always succeed, but it may
|
||||
hide up to all remaining free space. The actual space that
|
||||
would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
|
||||
This space is reclaimed once checkpoint=enable.
|
||||
|
||||
================================================================================
|
||||
DEBUGFS ENTRIES
|
||||
@@ -246,11 +262,14 @@ Files in /sys/fs/f2fs/<devname>
|
||||
..............................................................................
|
||||
File Content
|
||||
|
||||
gc_max_sleep_time This tuning parameter controls the maximum sleep
|
||||
gc_urgent_sleep_time This parameter controls sleep time for gc_urgent.
|
||||
500 ms is set by default. See above gc_urgent.
|
||||
|
||||
gc_min_sleep_time This tuning parameter controls the minimum sleep
|
||||
time for the garbage collection thread. Time is
|
||||
in milliseconds.
|
||||
|
||||
gc_min_sleep_time This tuning parameter controls the minimum sleep
|
||||
gc_max_sleep_time This tuning parameter controls the maximum sleep
|
||||
time for the garbage collection thread. Time is
|
||||
in milliseconds.
|
||||
|
||||
@@ -270,9 +289,6 @@ Files in /sys/fs/f2fs/<devname>
|
||||
to 1, background thread starts to do GC by given
|
||||
gc_urgent_sleep_time interval.
|
||||
|
||||
gc_urgent_sleep_time This parameter controls sleep time for gc_urgent.
|
||||
500 ms is set by default. See above gc_urgent.
|
||||
|
||||
reclaim_segments This parameter controls the number of prefree
|
||||
segments to be reclaimed. If the number of prefree
|
||||
segments is larger than the number of segments
|
||||
@@ -287,7 +303,16 @@ Files in /sys/fs/f2fs/<devname>
|
||||
checkpoint is triggered, and issued during the
|
||||
checkpoint. By default, it is disabled with 0.
|
||||
|
||||
trim_sections This parameter controls the number of sections
|
||||
discard_granularity This parameter controls the granularity of discard
|
||||
command size. It will issue discard commands iif
|
||||
the size is larger than given granularity. Its
|
||||
unit size is 4KB, and 4 (=16KB) is set by default.
|
||||
The maximum value is 128 (=512KB).
|
||||
|
||||
reserved_blocks This parameter indicates the number of blocks that
|
||||
f2fs reserves internally for root.
|
||||
|
||||
batched_trim_sections This parameter controls the number of sections
|
||||
to be trimmed out in batch mode when FITRIM
|
||||
conducts. 32 sections is set by default.
|
||||
|
||||
@@ -309,11 +334,35 @@ Files in /sys/fs/f2fs/<devname>
|
||||
the number is less than this value, it triggers
|
||||
in-place-updates.
|
||||
|
||||
min_seq_blocks This parameter controls the threshold to serialize
|
||||
write IOs issued by multiple threads in parallel.
|
||||
|
||||
min_hot_blocks This parameter controls the threshold to allocate
|
||||
a hot data log for pending data blocks to write.
|
||||
|
||||
min_ssr_sections This parameter adds the threshold when deciding
|
||||
SSR block allocation. If this is large, SSR mode
|
||||
will be enabled early.
|
||||
|
||||
ram_thresh This parameter controls the memory footprint used
|
||||
by free nids and cached nat entries. By default,
|
||||
10 is set, which indicates 10 MB / 1 GB RAM.
|
||||
|
||||
ra_nid_pages When building free nids, F2FS reads NAT blocks
|
||||
ahead for speed up. Default is 0.
|
||||
|
||||
dirty_nats_ratio Given dirty ratio of cached nat entries, F2FS
|
||||
determines flushing them in background.
|
||||
|
||||
max_victim_search This parameter controls the number of trials to
|
||||
find a victim segment when conducting SSR and
|
||||
cleaning operations. The default value is 4096
|
||||
which covers 8GB block address range.
|
||||
|
||||
migration_granularity For large-sized sections, F2FS can stop GC given
|
||||
this granularity instead of reclaiming entire
|
||||
section.
|
||||
|
||||
dir_level This parameter controls the directory level to
|
||||
support large directory. If a directory has a
|
||||
number of files, it can reduce the file lookup
|
||||
@@ -321,9 +370,56 @@ Files in /sys/fs/f2fs/<devname>
|
||||
Otherwise, it needs to decrease this value to
|
||||
reduce the space overhead. The default value is 0.
|
||||
|
||||
ram_thresh This parameter controls the memory footprint used
|
||||
by free nids and cached nat entries. By default,
|
||||
10 is set, which indicates 10 MB / 1 GB RAM.
|
||||
cp_interval F2FS tries to do checkpoint periodically, 60 secs
|
||||
by default.
|
||||
|
||||
idle_interval F2FS detects system is idle, if there's no F2FS
|
||||
operations during given interval, 5 secs by
|
||||
default.
|
||||
|
||||
discard_idle_interval F2FS detects the discard thread is idle, given
|
||||
time interval. Default is 5 secs.
|
||||
|
||||
gc_idle_interval F2FS detects the GC thread is idle, given time
|
||||
interval. Default is 5 secs.
|
||||
|
||||
umount_discard_timeout When unmounting the disk, F2FS waits for finishing
|
||||
queued discard commands which can take huge time.
|
||||
This gives time out for it, 5 secs by default.
|
||||
|
||||
iostat_enable This controls to enable/disable iostat in F2FS.
|
||||
|
||||
readdir_ra This enables/disabled readahead of inode blocks
|
||||
in readdir, and default is enabled.
|
||||
|
||||
gc_pin_file_thresh This indicates how many GC can be failed for the
|
||||
pinned file. If it exceeds this, F2FS doesn't
|
||||
guarantee its pinning state. 2048 trials is set
|
||||
by default.
|
||||
|
||||
extension_list This enables to change extension_list for hot/cold
|
||||
files in runtime.
|
||||
|
||||
inject_rate This controls injection rate of arbitrary faults.
|
||||
|
||||
inject_type This controls injection type of arbitrary faults.
|
||||
|
||||
dirty_segments This shows # of dirty segments.
|
||||
|
||||
lifetime_write_kbytes This shows # of data written to the disk.
|
||||
|
||||
features This shows current features enabled on F2FS.
|
||||
|
||||
current_reserved_blocks This shows # of blocks currently reserved.
|
||||
|
||||
unusable If checkpoint=disable, this shows the number of
|
||||
blocks that are unusable.
|
||||
If checkpoint=enable it shows the number of blocks
|
||||
that would be unusable if checkpoint=disable were
|
||||
to be set.
|
||||
|
||||
encoding This shows the encoding used for casefolding.
|
||||
If casefolding is not enabled, returns (none)
|
||||
|
||||
================================================================================
|
||||
USAGE
|
||||
@@ -656,3 +752,28 @@ algorithm.
|
||||
In order to identify whether the data in the victim segment are valid or not,
|
||||
F2FS manages a bitmap. Each bit represents the validity of a block, and the
|
||||
bitmap is composed of a bit stream covering whole blocks in main area.
|
||||
|
||||
Fallocate(2) Policy
|
||||
-------------------
|
||||
|
||||
The default policy follows the below posix rule.
|
||||
|
||||
Allocating disk space
|
||||
The default operation (i.e., mode is zero) of fallocate() allocates
|
||||
the disk space within the range specified by offset and len. The
|
||||
file size (as reported by stat(2)) will be changed if offset+len is
|
||||
greater than the file size. Any subregion within the range specified
|
||||
by offset and len that did not contain data before the call will be
|
||||
initialized to zero. This default behavior closely resembles the
|
||||
behavior of the posix_fallocate(3) library function, and is intended
|
||||
as a method of optimally implementing that function.
|
||||
|
||||
However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
|
||||
fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having
|
||||
zero or random data, which is useful to the below scenario where:
|
||||
1. create(fd)
|
||||
2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
|
||||
3. fallocate(fd, 0, 0, size)
|
||||
4. address = fibmap(fd, offset)
|
||||
5. open(blkdev)
|
||||
6. write(blkdev, address)
|
||||
|
||||
@@ -72,6 +72,9 @@ Online attacks
|
||||
fscrypt (and storage encryption in general) can only provide limited
|
||||
protection, if any at all, against online attacks. In detail:
|
||||
|
||||
Side-channel attacks
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
fscrypt is only resistant to side-channel attacks, such as timing or
|
||||
electromagnetic attacks, to the extent that the underlying Linux
|
||||
Cryptographic API algorithms are. If a vulnerable algorithm is used,
|
||||
@@ -80,29 +83,90 @@ attacker to mount a side channel attack against the online system.
|
||||
Side channel attacks may also be mounted against applications
|
||||
consuming decrypted data.
|
||||
|
||||
After an encryption key has been provided, fscrypt is not designed to
|
||||
hide the plaintext file contents or filenames from other users on the
|
||||
same system, regardless of the visibility of the keyring key.
|
||||
Instead, existing access control mechanisms such as file mode bits,
|
||||
POSIX ACLs, LSMs, or mount namespaces should be used for this purpose.
|
||||
Also note that as long as the encryption keys are *anywhere* in
|
||||
memory, an online attacker can necessarily compromise them by mounting
|
||||
a physical attack or by exploiting any kernel security vulnerability
|
||||
which provides an arbitrary memory read primitive.
|
||||
Unauthorized file access
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
While it is ostensibly possible to "evict" keys from the system,
|
||||
recently accessed encrypted files will remain accessible at least
|
||||
until the filesystem is unmounted or the VFS caches are dropped, e.g.
|
||||
using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the
|
||||
RAM is compromised before being powered off, it will likely still be
|
||||
possible to recover portions of the plaintext file contents, if not
|
||||
some of the encryption keys as well. (Since Linux v4.12, all
|
||||
in-kernel keys related to fscrypt are sanitized before being freed.
|
||||
However, userspace would need to do its part as well.)
|
||||
After an encryption key has been added, fscrypt does not hide the
|
||||
plaintext file contents or filenames from other users on the same
|
||||
system. Instead, existing access control mechanisms such as file mode
|
||||
bits, POSIX ACLs, LSMs, or namespaces should be used for this purpose.
|
||||
|
||||
Currently, fscrypt does not prevent a user from maliciously providing
|
||||
an incorrect key for another user's existing encrypted files. A
|
||||
protection against this is planned.
|
||||
(For the reasoning behind this, understand that while the key is
|
||||
added, the confidentiality of the data, from the perspective of the
|
||||
system itself, is *not* protected by the mathematical properties of
|
||||
encryption but rather only by the correctness of the kernel.
|
||||
Therefore, any encryption-specific access control checks would merely
|
||||
be enforced by kernel *code* and therefore would be largely redundant
|
||||
with the wide variety of access control mechanisms already available.)
|
||||
|
||||
Kernel memory compromise
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
An attacker who compromises the system enough to read from arbitrary
|
||||
memory, e.g. by mounting a physical attack or by exploiting a kernel
|
||||
security vulnerability, can compromise all encryption keys that are
|
||||
currently in use.
|
||||
|
||||
However, fscrypt allows encryption keys to be removed from the kernel,
|
||||
which may protect them from later compromise.
|
||||
|
||||
In more detail, the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl (or the
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl) can wipe a master
|
||||
encryption key from kernel memory. If it does so, it will also try to
|
||||
evict all cached inodes which had been "unlocked" using the key,
|
||||
thereby wiping their per-file keys and making them once again appear
|
||||
"locked", i.e. in ciphertext or encrypted form.
|
||||
|
||||
However, these ioctls have some limitations:
|
||||
|
||||
- Per-file keys for in-use files will *not* be removed or wiped.
|
||||
Therefore, for maximum effect, userspace should close the relevant
|
||||
encrypted files and directories before removing a master key, as
|
||||
well as kill any processes whose working directory is in an affected
|
||||
encrypted directory.
|
||||
|
||||
- The kernel cannot magically wipe copies of the master key(s) that
|
||||
userspace might have as well. Therefore, userspace must wipe all
|
||||
copies of the master key(s) it makes as well; normally this should
|
||||
be done immediately after FS_IOC_ADD_ENCRYPTION_KEY, without waiting
|
||||
for FS_IOC_REMOVE_ENCRYPTION_KEY. Naturally, the same also applies
|
||||
to all higher levels in the key hierarchy. Userspace should also
|
||||
follow other security precautions such as mlock()ing memory
|
||||
containing keys to prevent it from being swapped out.
|
||||
|
||||
- In general, decrypted contents and filenames in the kernel VFS
|
||||
caches are freed but not wiped. Therefore, portions thereof may be
|
||||
recoverable from freed memory, even after the corresponding key(s)
|
||||
were wiped. To partially solve this, you can set
|
||||
CONFIG_PAGE_POISONING=y in your kernel config and add page_poison=1
|
||||
to your kernel command line. However, this has a performance cost.
|
||||
|
||||
- Secret keys might still exist in CPU registers, in crypto
|
||||
accelerator hardware (if used by the crypto API to implement any of
|
||||
the algorithms), or in other places not explicitly considered here.
|
||||
|
||||
Limitations of v1 policies
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
v1 encryption policies have some weaknesses with respect to online
|
||||
attacks:
|
||||
|
||||
- There is no verification that the provided master key is correct.
|
||||
Therefore, a malicious user can temporarily associate the wrong key
|
||||
with another user's encrypted files to which they have read-only
|
||||
access. Because of filesystem caching, the wrong key will then be
|
||||
used by the other user's accesses to those files, even if the other
|
||||
user has the correct key in their own keyring. This violates the
|
||||
meaning of "read-only access".
|
||||
|
||||
- A compromise of a per-file key also compromises the master key from
|
||||
which it was derived.
|
||||
|
||||
- Non-root users cannot securely remove encryption keys.
|
||||
|
||||
All the above problems are fixed with v2 encryption policies. For
|
||||
this reason among others, it is recommended to use v2 encryption
|
||||
policies on all new encrypted directories.
|
||||
|
||||
Key hierarchy
|
||||
=============
|
||||
@@ -123,11 +187,52 @@ appropriate master key. There can be any number of master keys, each
|
||||
of which protects any number of directory trees on any number of
|
||||
filesystems.
|
||||
|
||||
Userspace should generate master keys either using a cryptographically
|
||||
secure random number generator, or by using a KDF (Key Derivation
|
||||
Function). Note that whenever a KDF is used to "stretch" a
|
||||
lower-entropy secret such as a passphrase, it is critical that a KDF
|
||||
designed for this purpose be used, such as scrypt, PBKDF2, or Argon2.
|
||||
Master keys must be real cryptographic keys, i.e. indistinguishable
|
||||
from random bytestrings of the same length. This implies that users
|
||||
**must not** directly use a password as a master key, zero-pad a
|
||||
shorter key, or repeat a shorter key. Security cannot be guaranteed
|
||||
if userspace makes any such error, as the cryptographic proofs and
|
||||
analysis would no longer apply.
|
||||
|
||||
Instead, users should generate master keys either using a
|
||||
cryptographically secure random number generator, or by using a KDF
|
||||
(Key Derivation Function). The kernel does not do any key stretching;
|
||||
therefore, if userspace derives the key from a low-entropy secret such
|
||||
as a passphrase, it is critical that a KDF designed for this purpose
|
||||
be used, such as scrypt, PBKDF2, or Argon2.
|
||||
|
||||
Key derivation function
|
||||
-----------------------
|
||||
|
||||
With one exception, fscrypt never uses the master key(s) for
|
||||
encryption directly. Instead, they are only used as input to a KDF
|
||||
(Key Derivation Function) to derive the actual keys.
|
||||
|
||||
The KDF used for a particular master key differs depending on whether
|
||||
the key is used for v1 encryption policies or for v2 encryption
|
||||
policies. Users **must not** use the same key for both v1 and v2
|
||||
encryption policies. (No real-world attack is currently known on this
|
||||
specific case of key reuse, but its security cannot be guaranteed
|
||||
since the cryptographic proofs and analysis would no longer apply.)
|
||||
|
||||
For v1 encryption policies, the KDF only supports deriving per-file
|
||||
encryption keys. It works by encrypting the master key with
|
||||
AES-128-ECB, using the file's 16-byte nonce as the AES key. The
|
||||
resulting ciphertext is used as the derived key. If the ciphertext is
|
||||
longer than needed, then it is truncated to the needed length.
|
||||
|
||||
For v2 encryption policies, the KDF is HKDF-SHA512. The master key is
|
||||
passed as the "input keying material", no salt is used, and a distinct
|
||||
"application-specific information string" is used for each distinct
|
||||
key to be derived. For example, when a per-file encryption key is
|
||||
derived, the application-specific information string is the file's
|
||||
nonce prefixed with "fscrypt\\0" and a context byte. Different
|
||||
context bytes are used for other types of derived keys.
|
||||
|
||||
HKDF-SHA512 is preferred to the original AES-128-ECB based KDF because
|
||||
HKDF is more flexible, is nonreversible, and evenly distributes
|
||||
entropy from the master key. HKDF is also standardized and widely
|
||||
used by other software, whereas the AES-128-ECB based KDF is ad-hoc.
|
||||
|
||||
Per-file keys
|
||||
-------------
|
||||
@@ -138,29 +243,9 @@ files doesn't map to the same ciphertext, or vice versa. In most
|
||||
cases, fscrypt does this by deriving per-file keys. When a new
|
||||
encrypted inode (regular file, directory, or symlink) is created,
|
||||
fscrypt randomly generates a 16-byte nonce and stores it in the
|
||||
inode's encryption xattr. Then, it uses a KDF (Key Derivation
|
||||
Function) to derive the file's key from the master key and nonce.
|
||||
|
||||
The Adiantum encryption mode (see `Encryption modes and usage`_) is
|
||||
special, since it accepts longer IVs and is suitable for both contents
|
||||
and filenames encryption. For it, a "direct key" option is offered
|
||||
where the file's nonce is included in the IVs and the master key is
|
||||
used for encryption directly. This improves performance; however,
|
||||
users must not use the same master key for any other encryption mode.
|
||||
|
||||
Below, the KDF and design considerations are described in more detail.
|
||||
|
||||
The current KDF works by encrypting the master key with AES-128-ECB,
|
||||
using the file's nonce as the AES key. The output is used as the
|
||||
derived key. If the output is longer than needed, then it is
|
||||
truncated to the needed length.
|
||||
|
||||
Note: this KDF meets the primary security requirement, which is to
|
||||
produce unique derived keys that preserve the entropy of the master
|
||||
key, assuming that the master key is already a good pseudorandom key.
|
||||
However, it is nonstandard and has some problems such as being
|
||||
reversible, so it is generally considered to be a mistake! It may be
|
||||
replaced with HKDF or another more standard KDF in the future.
|
||||
inode's encryption xattr. Then, it uses a KDF (as described in `Key
|
||||
derivation function`_) to derive the file's key from the master key
|
||||
and nonce.
|
||||
|
||||
Key derivation was chosen over key wrapping because wrapped keys would
|
||||
require larger xattrs which would be less likely to fit in-line in the
|
||||
@@ -176,6 +261,37 @@ rejected as it would have prevented ext4 filesystems from being
|
||||
resized, and by itself still wouldn't have been sufficient to prevent
|
||||
the same key from being directly reused for both XTS and CTS-CBC.
|
||||
|
||||
DIRECT_KEY and per-mode keys
|
||||
----------------------------
|
||||
|
||||
The Adiantum encryption mode (see `Encryption modes and usage`_) is
|
||||
suitable for both contents and filenames encryption, and it accepts
|
||||
long IVs --- long enough to hold both an 8-byte logical block number
|
||||
and a 16-byte per-file nonce. Also, the overhead of each Adiantum key
|
||||
is greater than that of an AES-256-XTS key.
|
||||
|
||||
Therefore, to improve performance and save memory, for Adiantum a
|
||||
"direct key" configuration is supported. When the user has enabled
|
||||
this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy,
|
||||
per-file keys are not used. Instead, whenever any data (contents or
|
||||
filenames) is encrypted, the file's 16-byte nonce is included in the
|
||||
IV. Moreover:
|
||||
|
||||
- For v1 encryption policies, the encryption is done directly with the
|
||||
master key. Because of this, users **must not** use the same master
|
||||
key for any other purpose, even for other v1 policies.
|
||||
|
||||
- For v2 encryption policies, the encryption is done with a per-mode
|
||||
key derived using the KDF. Users may use the same master key for
|
||||
other v2 encryption policies.
|
||||
|
||||
Key identifiers
|
||||
---------------
|
||||
|
||||
For master keys used for v2 encryption policies, a unique 16-byte "key
|
||||
identifier" is also derived using the KDF. This value is stored in
|
||||
the clear, since it is needed to reliably identify the key itself.
|
||||
|
||||
Encryption modes and usage
|
||||
==========================
|
||||
|
||||
@@ -191,7 +307,9 @@ Currently, the following pairs of encryption modes are supported:
|
||||
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
|
||||
|
||||
AES-128-CBC was added only for low-powered embedded devices with
|
||||
crypto accelerators such as CAAM or CESA that do not support XTS.
|
||||
crypto accelerators such as CAAM or CESA that do not support XTS. To
|
||||
use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256
|
||||
implementation) must be enabled so that ESSIV can be used.
|
||||
|
||||
Adiantum is a (primarily) stream cipher-based mode that is fast even
|
||||
on CPUs without dedicated crypto instructions. It's also a true
|
||||
@@ -223,9 +341,10 @@ a little endian number, except that:
|
||||
is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
|
||||
of the file's data encryption key.
|
||||
|
||||
- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in
|
||||
the fscrypt_policy), the file's nonce is also appended to the IV.
|
||||
Currently this is only allowed with the Adiantum encryption mode.
|
||||
- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY
|
||||
set in the fscrypt_policy), the file's nonce is also appended to the
|
||||
IV. Currently this is only allowed with the Adiantum encryption
|
||||
mode.
|
||||
|
||||
Filenames encryption
|
||||
--------------------
|
||||
@@ -267,49 +386,77 @@ User API
|
||||
Setting an encryption policy
|
||||
----------------------------
|
||||
|
||||
FS_IOC_SET_ENCRYPTION_POLICY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an
|
||||
empty directory or verifies that a directory or regular file already
|
||||
has the specified encryption policy. It takes in a pointer to a
|
||||
:c:type:`struct fscrypt_policy`, defined as follows::
|
||||
:c:type:`struct fscrypt_policy_v1` or a :c:type:`struct
|
||||
fscrypt_policy_v2`, defined as follows::
|
||||
|
||||
#define FS_KEY_DESCRIPTOR_SIZE 8
|
||||
|
||||
struct fscrypt_policy {
|
||||
#define FSCRYPT_POLICY_V1 0
|
||||
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
|
||||
struct fscrypt_policy_v1 {
|
||||
__u8 version;
|
||||
__u8 contents_encryption_mode;
|
||||
__u8 filenames_encryption_mode;
|
||||
__u8 flags;
|
||||
__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
|
||||
__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
|
||||
};
|
||||
#define fscrypt_policy fscrypt_policy_v1
|
||||
|
||||
#define FSCRYPT_POLICY_V2 2
|
||||
#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
|
||||
struct fscrypt_policy_v2 {
|
||||
__u8 version;
|
||||
__u8 contents_encryption_mode;
|
||||
__u8 filenames_encryption_mode;
|
||||
__u8 flags;
|
||||
__u8 __reserved[4];
|
||||
__u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
|
||||
};
|
||||
|
||||
This structure must be initialized as follows:
|
||||
|
||||
- ``version`` must be 0.
|
||||
- ``version`` must be FSCRYPT_POLICY_V1 (0) if the struct is
|
||||
:c:type:`fscrypt_policy_v1` or FSCRYPT_POLICY_V2 (2) if the struct
|
||||
is :c:type:`fscrypt_policy_v2`. (Note: we refer to the original
|
||||
policy version as "v1", though its version code is really 0.) For
|
||||
new encrypted directories, use v2 policies.
|
||||
|
||||
- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must
|
||||
be set to constants from ``<linux/fs.h>`` which identify the
|
||||
encryption modes to use. If unsure, use
|
||||
FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode``
|
||||
and FS_ENCRYPTION_MODE_AES_256_CTS (4) for
|
||||
``filenames_encryption_mode``.
|
||||
be set to constants from ``<linux/fscrypt.h>`` which identify the
|
||||
encryption modes to use. If unsure, use FSCRYPT_MODE_AES_256_XTS
|
||||
(1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
|
||||
(4) for ``filenames_encryption_mode``.
|
||||
|
||||
- ``flags`` must contain a value from ``<linux/fs.h>`` which
|
||||
- ``flags`` must contain a value from ``<linux/fscrypt.h>`` which
|
||||
identifies the amount of NUL-padding to use when encrypting
|
||||
filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3).
|
||||
In addition, if the chosen encryption modes are both
|
||||
FS_ENCRYPTION_MODE_ADIANTUM, this can contain
|
||||
FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be
|
||||
used directly, without key derivation.
|
||||
filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3).
|
||||
Additionally, if the encryption modes are both
|
||||
FSCRYPT_MODE_ADIANTUM, this can contain
|
||||
FSCRYPT_POLICY_FLAG_DIRECT_KEY; see `DIRECT_KEY and per-mode keys`_.
|
||||
|
||||
- ``master_key_descriptor`` specifies how to find the master key in
|
||||
the keyring; see `Adding keys`_. It is up to userspace to choose a
|
||||
unique ``master_key_descriptor`` for each master key. The e4crypt
|
||||
and fscrypt tools use the first 8 bytes of
|
||||
- For v2 encryption policies, ``__reserved`` must be zeroed.
|
||||
|
||||
- For v1 encryption policies, ``master_key_descriptor`` specifies how
|
||||
to find the master key in a keyring; see `Adding keys`_. It is up
|
||||
to userspace to choose a unique ``master_key_descriptor`` for each
|
||||
master key. The e4crypt and fscrypt tools use the first 8 bytes of
|
||||
``SHA-512(SHA-512(master_key))``, but this particular scheme is not
|
||||
required. Also, the master key need not be in the keyring yet when
|
||||
FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added
|
||||
before any files can be created in the encrypted directory.
|
||||
|
||||
For v2 encryption policies, ``master_key_descriptor`` has been
|
||||
replaced with ``master_key_identifier``, which is longer and cannot
|
||||
be arbitrarily chosen. Instead, the key must first be added using
|
||||
`FS_IOC_ADD_ENCRYPTION_KEY`_. Then, the ``key_spec.u.identifier``
|
||||
the kernel returned in the :c:type:`struct fscrypt_add_key_arg` must
|
||||
be used as the ``master_key_identifier`` in the :c:type:`struct
|
||||
fscrypt_policy_v2`.
|
||||
|
||||
If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY
|
||||
verifies that the file is an empty directory. If so, the specified
|
||||
encryption policy is assigned to the directory, turning it into an
|
||||
@@ -325,6 +472,15 @@ policy exactly matches the actual one. If they match, then the ioctl
|
||||
returns 0. Otherwise, it fails with EEXIST. This works on both
|
||||
regular files and directories, including nonempty directories.
|
||||
|
||||
When a v2 encryption policy is assigned to a directory, it is also
|
||||
required that either the specified key has been added by the current
|
||||
user or that the caller has CAP_FOWNER in the initial user namespace.
|
||||
(This is needed to prevent a user from encrypting their data with
|
||||
another user's key.) The key must remain added while
|
||||
FS_IOC_SET_ENCRYPTION_POLICY is executing. However, if the new
|
||||
encrypted directory does not need to be accessed immediately, then the
|
||||
key can be removed right away afterwards.
|
||||
|
||||
Note that the ext4 filesystem does not allow the root directory to be
|
||||
encrypted, even if it is empty. Users who want to encrypt an entire
|
||||
filesystem with one key should consider using dm-crypt instead.
|
||||
@@ -337,15 +493,19 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
|
||||
- ``EEXIST``: the file is already encrypted with an encryption policy
|
||||
different from the one specified
|
||||
- ``EINVAL``: an invalid encryption policy was specified (invalid
|
||||
version, mode(s), or flags)
|
||||
version, mode(s), or flags; or reserved bits were set)
|
||||
- ``ENOKEY``: a v2 encryption policy was specified, but the key with
|
||||
the specified ``master_key_identifier`` has not been added, nor does
|
||||
the process have the CAP_FOWNER capability in the initial user
|
||||
namespace
|
||||
- ``ENOTDIR``: the file is unencrypted and is a regular file, not a
|
||||
directory
|
||||
- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
support for filesystems, or the filesystem superblock has not
|
||||
had encryption enabled on it. (For example, to use encryption on an
|
||||
ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the
|
||||
ext4 filesystem, CONFIG_FS_ENCRYPTION must be enabled in the
|
||||
kernel config, and the superblock must have had the "encrypt"
|
||||
feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O
|
||||
encrypt``.)
|
||||
@@ -356,25 +516,79 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
|
||||
Getting an encryption policy
|
||||
----------------------------
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct
|
||||
fscrypt_policy`, if any, for a directory or regular file. See above
|
||||
for the struct definition. No additional permissions are required
|
||||
beyond the ability to open the file.
|
||||
Two ioctls are available to get a file's encryption policy:
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors:
|
||||
- `FS_IOC_GET_ENCRYPTION_POLICY_EX`_
|
||||
- `FS_IOC_GET_ENCRYPTION_POLICY`_
|
||||
|
||||
The extended (_EX) version of the ioctl is more general and is
|
||||
recommended to use when possible. However, on older kernels only the
|
||||
original ioctl is available. Applications should try the extended
|
||||
version, and if it fails with ENOTTY fall back to the original
|
||||
version.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY_EX
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_POLICY_EX ioctl retrieves the encryption
|
||||
policy, if any, for a directory or regular file. No additional
|
||||
permissions are required beyond the ability to open the file. It
|
||||
takes in a pointer to a :c:type:`struct fscrypt_get_policy_ex_arg`,
|
||||
defined as follows::
|
||||
|
||||
struct fscrypt_get_policy_ex_arg {
|
||||
__u64 policy_size; /* input/output */
|
||||
union {
|
||||
__u8 version;
|
||||
struct fscrypt_policy_v1 v1;
|
||||
struct fscrypt_policy_v2 v2;
|
||||
} policy; /* output */
|
||||
};
|
||||
|
||||
The caller must initialize ``policy_size`` to the size available for
|
||||
the policy struct, i.e. ``sizeof(arg.policy)``.
|
||||
|
||||
On success, the policy struct is returned in ``policy``, and its
|
||||
actual size is returned in ``policy_size``. ``policy.version`` should
|
||||
be checked to determine the version of policy returned. Note that the
|
||||
version code for the "v1" policy is actually 0 (FSCRYPT_POLICY_V1).
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY_EX can fail with the following errors:
|
||||
|
||||
- ``EINVAL``: the file is encrypted, but it uses an unrecognized
|
||||
encryption context format
|
||||
encryption policy version
|
||||
- ``ENODATA``: the file is not encrypted
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption,
|
||||
or this kernel is too old to support FS_IOC_GET_ENCRYPTION_POLICY_EX
|
||||
(try FS_IOC_GET_ENCRYPTION_POLICY instead)
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
- ``EOVERFLOW``: the file is encrypted and uses a recognized
|
||||
encryption policy version, but the policy struct does not fit into
|
||||
the provided buffer
|
||||
|
||||
Note: if you only need to know whether a file is encrypted or not, on
|
||||
most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl
|
||||
and check for FS_ENCRYPT_FL, or to use the statx() system call and
|
||||
check for STATX_ATTR_ENCRYPTED in stx_attributes.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_POLICY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_POLICY ioctl can also retrieve the
|
||||
encryption policy, if any, for a directory or regular file. However,
|
||||
unlike `FS_IOC_GET_ENCRYPTION_POLICY_EX`_,
|
||||
FS_IOC_GET_ENCRYPTION_POLICY only supports the original policy
|
||||
version. It takes in a pointer directly to a :c:type:`struct
|
||||
fscrypt_policy_v1` rather than a :c:type:`struct
|
||||
fscrypt_get_policy_ex_arg`.
|
||||
|
||||
The error codes for FS_IOC_GET_ENCRYPTION_POLICY are the same as those
|
||||
for FS_IOC_GET_ENCRYPTION_POLICY_EX, except that
|
||||
FS_IOC_GET_ENCRYPTION_POLICY also returns ``EINVAL`` if the file is
|
||||
encrypted using a newer encryption policy version.
|
||||
|
||||
Getting the per-filesystem salt
|
||||
-------------------------------
|
||||
|
||||
@@ -390,8 +604,115 @@ generate and manage any needed salt(s) in userspace.
|
||||
Adding keys
|
||||
-----------
|
||||
|
||||
To provide a master key, userspace must add it to an appropriate
|
||||
keyring using the add_key() system call (see:
|
||||
FS_IOC_ADD_ENCRYPTION_KEY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_ADD_ENCRYPTION_KEY ioctl adds a master encryption key to
|
||||
the filesystem, making all files on the filesystem which were
|
||||
encrypted using that key appear "unlocked", i.e. in plaintext form.
|
||||
It can be executed on any file or directory on the target filesystem,
|
||||
but using the filesystem's root directory is recommended. It takes in
|
||||
a pointer to a :c:type:`struct fscrypt_add_key_arg`, defined as
|
||||
follows::
|
||||
|
||||
struct fscrypt_add_key_arg {
|
||||
struct fscrypt_key_specifier key_spec;
|
||||
__u32 raw_size;
|
||||
__u32 __reserved[9];
|
||||
__u8 raw[];
|
||||
};
|
||||
|
||||
#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR 1
|
||||
#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER 2
|
||||
|
||||
struct fscrypt_key_specifier {
|
||||
__u32 type; /* one of FSCRYPT_KEY_SPEC_TYPE_* */
|
||||
__u32 __reserved;
|
||||
union {
|
||||
__u8 __reserved[32]; /* reserve some extra space */
|
||||
__u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
|
||||
__u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
|
||||
} u;
|
||||
};
|
||||
|
||||
:c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized
|
||||
as follows:
|
||||
|
||||
- If the key is being added for use by v1 encryption policies, then
|
||||
``key_spec.type`` must contain FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR, and
|
||||
``key_spec.u.descriptor`` must contain the descriptor of the key
|
||||
being added, corresponding to the value in the
|
||||
``master_key_descriptor`` field of :c:type:`struct
|
||||
fscrypt_policy_v1`. To add this type of key, the calling process
|
||||
must have the CAP_SYS_ADMIN capability in the initial user
|
||||
namespace.
|
||||
|
||||
Alternatively, if the key is being added for use by v2 encryption
|
||||
policies, then ``key_spec.type`` must contain
|
||||
FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, and ``key_spec.u.identifier`` is
|
||||
an *output* field which the kernel fills in with a cryptographic
|
||||
hash of the key. To add this type of key, the calling process does
|
||||
not need any privileges. However, the number of keys that can be
|
||||
added is limited by the user's quota for the keyrings service (see
|
||||
``Documentation/security/keys/core.rst``).
|
||||
|
||||
- ``raw_size`` must be the size of the ``raw`` key provided, in bytes.
|
||||
|
||||
- ``raw`` is a variable-length field which must contain the actual
|
||||
key, ``raw_size`` bytes long.
|
||||
|
||||
For v2 policy keys, the kernel keeps track of which user (identified
|
||||
by effective user ID) added the key, and only allows the key to be
|
||||
removed by that user --- or by "root", if they use
|
||||
`FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS`_.
|
||||
|
||||
However, if another user has added the key, it may be desirable to
|
||||
prevent that other user from unexpectedly removing it. Therefore,
|
||||
FS_IOC_ADD_ENCRYPTION_KEY may also be used to add a v2 policy key
|
||||
*again*, even if it's already added by other user(s). In this case,
|
||||
FS_IOC_ADD_ENCRYPTION_KEY will just install a claim to the key for the
|
||||
current user, rather than actually add the key again (but the raw key
|
||||
must still be provided, as a proof of knowledge).
|
||||
|
||||
FS_IOC_ADD_ENCRYPTION_KEY returns 0 if either the key or a claim to
|
||||
the key was either added or already exists.
|
||||
|
||||
FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors:
|
||||
|
||||
- ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the
|
||||
caller does not have the CAP_SYS_ADMIN capability in the initial
|
||||
user namespace
|
||||
- ``EDQUOT``: the key quota for this user would be exceeded by adding
|
||||
the key
|
||||
- ``EINVAL``: invalid key size or key specifier type, or reserved bits
|
||||
were set
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
|
||||
Legacy method
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
For v1 encryption policies, a master encryption key can also be
|
||||
provided by adding it to a process-subscribed keyring, e.g. to a
|
||||
session keyring, or to a user keyring if the user keyring is linked
|
||||
into the session keyring.
|
||||
|
||||
This method is deprecated (and not supported for v2 encryption
|
||||
policies) for several reasons. First, it cannot be used in
|
||||
combination with FS_IOC_REMOVE_ENCRYPTION_KEY (see `Removing keys`_),
|
||||
so for removing a key a workaround such as keyctl_unlink() in
|
||||
combination with ``sync; echo 2 > /proc/sys/vm/drop_caches`` would
|
||||
have to be used. Second, it doesn't match the fact that the
|
||||
locked/unlocked status of encrypted files (i.e. whether they appear to
|
||||
be in plaintext form or in ciphertext form) is global. This mismatch
|
||||
has caused much confusion as well as real problems when processes
|
||||
running under different UIDs, such as a ``sudo`` command, need to
|
||||
access encrypted files.
|
||||
|
||||
Nevertheless, to add a key to one of the process-subscribed keyrings,
|
||||
the add_key() system call can be used (see:
|
||||
``Documentation/security/keys/core.rst``). The key type must be
|
||||
"logon"; keys of this type are kept in kernel memory and cannot be
|
||||
read back by userspace. The key description must be "fscrypt:"
|
||||
@@ -399,12 +720,12 @@ followed by the 16-character lower case hex representation of the
|
||||
``master_key_descriptor`` that was set in the encryption policy. The
|
||||
key payload must conform to the following structure::
|
||||
|
||||
#define FS_MAX_KEY_SIZE 64
|
||||
#define FSCRYPT_MAX_KEY_SIZE 64
|
||||
|
||||
struct fscrypt_key {
|
||||
u32 mode;
|
||||
u8 raw[FS_MAX_KEY_SIZE];
|
||||
u32 size;
|
||||
__u32 mode;
|
||||
__u8 raw[FSCRYPT_MAX_KEY_SIZE];
|
||||
__u32 size;
|
||||
};
|
||||
|
||||
``mode`` is ignored; just set it to 0. The actual key is provided in
|
||||
@@ -416,26 +737,194 @@ with a filesystem-specific prefix such as "ext4:". However, the
|
||||
filesystem-specific prefixes are deprecated and should not be used in
|
||||
new programs.
|
||||
|
||||
There are several different types of keyrings in which encryption keys
|
||||
may be placed, such as a session keyring, a user session keyring, or a
|
||||
user keyring. Each key must be placed in a keyring that is "attached"
|
||||
to all processes that might need to access files encrypted with it, in
|
||||
the sense that request_key() will find the key. Generally, if only
|
||||
processes belonging to a specific user need to access a given
|
||||
encrypted directory and no session keyring has been installed, then
|
||||
that directory's key should be placed in that user's user session
|
||||
keyring or user keyring. Otherwise, a session keyring should be
|
||||
installed if needed, and the key should be linked into that session
|
||||
keyring, or in a keyring linked into that session keyring.
|
||||
Removing keys
|
||||
-------------
|
||||
|
||||
Note: introducing the complex visibility semantics of keyrings here
|
||||
was arguably a mistake --- especially given that by design, after any
|
||||
process successfully opens an encrypted file (thereby setting up the
|
||||
per-file key), possessing the keyring key is not actually required for
|
||||
any process to read/write the file until its in-memory inode is
|
||||
evicted. In the future there probably should be a way to provide keys
|
||||
directly to the filesystem instead, which would make the intended
|
||||
semantics clearer.
|
||||
Two ioctls are available for removing a key that was added by
|
||||
`FS_IOC_ADD_ENCRYPTION_KEY`_:
|
||||
|
||||
- `FS_IOC_REMOVE_ENCRYPTION_KEY`_
|
||||
- `FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS`_
|
||||
|
||||
These two ioctls differ only in cases where v2 policy keys are added
|
||||
or removed by non-root users.
|
||||
|
||||
These ioctls don't work on keys that were added via the legacy
|
||||
process-subscribed keyrings mechanism.
|
||||
|
||||
Before using these ioctls, read the `Kernel memory compromise`_
|
||||
section for a discussion of the security goals and limitations of
|
||||
these ioctls.
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_REMOVE_ENCRYPTION_KEY ioctl removes a claim to a master
|
||||
encryption key from the filesystem, and possibly removes the key
|
||||
itself. It can be executed on any file or directory on the target
|
||||
filesystem, but using the filesystem's root directory is recommended.
|
||||
It takes in a pointer to a :c:type:`struct fscrypt_remove_key_arg`,
|
||||
defined as follows::
|
||||
|
||||
struct fscrypt_remove_key_arg {
|
||||
struct fscrypt_key_specifier key_spec;
|
||||
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY 0x00000001
|
||||
#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS 0x00000002
|
||||
__u32 removal_status_flags; /* output */
|
||||
__u32 __reserved[5];
|
||||
};
|
||||
|
||||
This structure must be zeroed, then initialized as follows:
|
||||
|
||||
- The key to remove is specified by ``key_spec``:
|
||||
|
||||
- To remove a key used by v1 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR and fill
|
||||
in ``key_spec.u.descriptor``. To remove this type of key, the
|
||||
calling process must have the CAP_SYS_ADMIN capability in the
|
||||
initial user namespace.
|
||||
|
||||
- To remove a key used by v2 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER and fill
|
||||
in ``key_spec.u.identifier``.
|
||||
|
||||
For v2 policy keys, this ioctl is usable by non-root users. However,
|
||||
to make this possible, it actually just removes the current user's
|
||||
claim to the key, undoing a single call to FS_IOC_ADD_ENCRYPTION_KEY.
|
||||
Only after all claims are removed is the key really removed.
|
||||
|
||||
For example, if FS_IOC_ADD_ENCRYPTION_KEY was called with uid 1000,
|
||||
then the key will be "claimed" by uid 1000, and
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY will only succeed as uid 1000. Or, if
|
||||
both uids 1000 and 2000 added the key, then for each uid
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY will only remove their own claim. Only
|
||||
once *both* are removed is the key really removed. (Think of it like
|
||||
unlinking a file that may have hard links.)
|
||||
|
||||
If FS_IOC_REMOVE_ENCRYPTION_KEY really removes the key, it will also
|
||||
try to "lock" all files that had been unlocked with the key. It won't
|
||||
lock files that are still in-use, so this ioctl is expected to be used
|
||||
in cooperation with userspace ensuring that none of the files are
|
||||
still open. However, if necessary, this ioctl can be executed again
|
||||
later to retry locking any remaining files.
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY returns 0 if either the key was removed
|
||||
(but may still have files remaining to be locked), the user's claim to
|
||||
the key was removed, or the key was already removed but had files
|
||||
remaining to be the locked so the ioctl retried locking them. In any
|
||||
of these cases, ``removal_status_flags`` is filled in with the
|
||||
following informational status flags:
|
||||
|
||||
- ``FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY``: set if some file(s)
|
||||
are still in-use. Not guaranteed to be set in the case where only
|
||||
the user's claim to the key was removed.
|
||||
- ``FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS``: set if only the
|
||||
user's claim to the key was removed, not the key itself
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY can fail with the following errors:
|
||||
|
||||
- ``EACCES``: The FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR key specifier type
|
||||
was specified, but the caller does not have the CAP_SYS_ADMIN
|
||||
capability in the initial user namespace
|
||||
- ``EINVAL``: invalid key specifier type, or reserved bits were set
|
||||
- ``ENOKEY``: the key object was not found at all, i.e. it was never
|
||||
added in the first place or was already fully removed including all
|
||||
files locked; or, the user does not have a claim to the key (but
|
||||
someone else does).
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS is exactly the same as
|
||||
`FS_IOC_REMOVE_ENCRYPTION_KEY`_, except that for v2 policy keys, the
|
||||
ALL_USERS version of the ioctl will remove all users' claims to the
|
||||
key, not just the current user's. I.e., the key itself will always be
|
||||
removed, no matter how many users have added it. This difference is
|
||||
only meaningful if non-root users are adding and removing keys.
|
||||
|
||||
Because of this, FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS also requires
|
||||
"root", namely the CAP_SYS_ADMIN capability in the initial user
|
||||
namespace. Otherwise it will fail with EACCES.
|
||||
|
||||
Getting key status
|
||||
------------------
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_KEY_STATUS
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl retrieves the status of a
|
||||
master encryption key. It can be executed on any file or directory on
|
||||
the target filesystem, but using the filesystem's root directory is
|
||||
recommended. It takes in a pointer to a :c:type:`struct
|
||||
fscrypt_get_key_status_arg`, defined as follows::
|
||||
|
||||
struct fscrypt_get_key_status_arg {
|
||||
/* input */
|
||||
struct fscrypt_key_specifier key_spec;
|
||||
__u32 __reserved[6];
|
||||
|
||||
/* output */
|
||||
#define FSCRYPT_KEY_STATUS_ABSENT 1
|
||||
#define FSCRYPT_KEY_STATUS_PRESENT 2
|
||||
#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED 3
|
||||
__u32 status;
|
||||
#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF 0x00000001
|
||||
__u32 status_flags;
|
||||
__u32 user_count;
|
||||
__u32 __out_reserved[13];
|
||||
};
|
||||
|
||||
The caller must zero all input fields, then fill in ``key_spec``:
|
||||
|
||||
- To get the status of a key for v1 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR and fill
|
||||
in ``key_spec.u.descriptor``.
|
||||
|
||||
- To get the status of a key for v2 encryption policies, set
|
||||
``key_spec.type`` to FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER and fill
|
||||
in ``key_spec.u.identifier``.
|
||||
|
||||
On success, 0 is returned and the kernel fills in the output fields:
|
||||
|
||||
- ``status`` indicates whether the key is absent, present, or
|
||||
incompletely removed. Incompletely removed means that the master
|
||||
secret has been removed, but some files are still in use; i.e.,
|
||||
`FS_IOC_REMOVE_ENCRYPTION_KEY`_ returned 0 but set the informational
|
||||
status flag FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY.
|
||||
|
||||
- ``status_flags`` can contain the following flags:
|
||||
|
||||
- ``FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF`` indicates that the key
|
||||
has added by the current user. This is only set for keys
|
||||
identified by ``identifier`` rather than by ``descriptor``.
|
||||
|
||||
- ``user_count`` specifies the number of users who have added the key.
|
||||
This is only set for keys identified by ``identifier`` rather than
|
||||
by ``descriptor``.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_KEY_STATUS can fail with the following errors:
|
||||
|
||||
- ``EINVAL``: invalid key specifier type, or reserved bits were set
|
||||
- ``ENOTTY``: this type of filesystem does not implement encryption
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with encryption
|
||||
support for this filesystem, or the filesystem superblock has not
|
||||
had encryption enabled on it
|
||||
|
||||
Among other use cases, FS_IOC_GET_ENCRYPTION_KEY_STATUS can be useful
|
||||
for determining whether the key for a given encrypted directory needs
|
||||
to be added before prompting the user for the passphrase needed to
|
||||
derive the key.
|
||||
|
||||
FS_IOC_GET_ENCRYPTION_KEY_STATUS can only get the status of keys in
|
||||
the filesystem-level keyring, i.e. the keyring managed by
|
||||
`FS_IOC_ADD_ENCRYPTION_KEY`_ and `FS_IOC_REMOVE_ENCRYPTION_KEY`_. It
|
||||
cannot get the status of a key that has only been added for use by v1
|
||||
encryption policies using the legacy mechanism involving
|
||||
process-subscribed keyrings.
|
||||
|
||||
Access semantics
|
||||
================
|
||||
@@ -451,10 +940,18 @@ astute users may notice some differences in behavior:
|
||||
- Unencrypted files, or files encrypted with a different encryption
|
||||
policy (i.e. different key, modes, or flags), cannot be renamed or
|
||||
linked into an encrypted directory; see `Encryption policy
|
||||
enforcement`_. Attempts to do so will fail with EPERM. However,
|
||||
enforcement`_. Attempts to do so will fail with EXDEV. However,
|
||||
encrypted files can be renamed within an encrypted directory, or
|
||||
into an unencrypted directory.
|
||||
|
||||
Note: "moving" an unencrypted file into an encrypted directory, e.g.
|
||||
with the `mv` program, is implemented in userspace by a copy
|
||||
followed by a delete. Be aware that the original unencrypted data
|
||||
may remain recoverable from free space on the disk; prefer to keep
|
||||
all files encrypted from the very beginning. The `shred` program
|
||||
may be used to overwrite the source files but isn't guaranteed to be
|
||||
effective on all filesystems and storage devices.
|
||||
|
||||
- Direct I/O is not supported on encrypted files. Attempts to use
|
||||
direct I/O on such files will fall back to buffered I/O.
|
||||
|
||||
@@ -490,7 +987,7 @@ Without the key
|
||||
|
||||
Some filesystem operations may be performed on encrypted regular
|
||||
files, directories, and symlinks even before their encryption key has
|
||||
been provided:
|
||||
been added, or after their encryption key has been removed:
|
||||
|
||||
- File metadata may be read, e.g. using stat().
|
||||
|
||||
@@ -541,7 +1038,7 @@ not be encrypted.
|
||||
Except for those special files, it is forbidden to have unencrypted
|
||||
files, or files encrypted with a different encryption policy, in an
|
||||
encrypted directory tree. Attempts to link or rename such a file into
|
||||
an encrypted directory will fail with EPERM. This is also enforced
|
||||
an encrypted directory will fail with EXDEV. This is also enforced
|
||||
during ->lookup() to provide limited protection against offline
|
||||
attacks that try to disable or downgrade encryption in known locations
|
||||
where applications may later write sensitive data. It is recommended
|
||||
@@ -555,33 +1052,44 @@ Encryption context
|
||||
------------------
|
||||
|
||||
An encryption policy is represented on-disk by a :c:type:`struct
|
||||
fscrypt_context`. It is up to individual filesystems to decide where
|
||||
to store it, but normally it would be stored in a hidden extended
|
||||
attribute. It should *not* be exposed by the xattr-related system
|
||||
calls such as getxattr() and setxattr() because of the special
|
||||
semantics of the encryption xattr. (In particular, there would be
|
||||
much confusion if an encryption policy were to be added to or removed
|
||||
from anything other than an empty directory.) The struct is defined
|
||||
as follows::
|
||||
fscrypt_context_v1` or a :c:type:`struct fscrypt_context_v2`. It is
|
||||
up to individual filesystems to decide where to store it, but normally
|
||||
it would be stored in a hidden extended attribute. It should *not* be
|
||||
exposed by the xattr-related system calls such as getxattr() and
|
||||
setxattr() because of the special semantics of the encryption xattr.
|
||||
(In particular, there would be much confusion if an encryption policy
|
||||
were to be added to or removed from anything other than an empty
|
||||
directory.) These structs are defined as follows::
|
||||
|
||||
#define FS_KEY_DESCRIPTOR_SIZE 8
|
||||
#define FS_KEY_DERIVATION_NONCE_SIZE 16
|
||||
|
||||
struct fscrypt_context {
|
||||
u8 format;
|
||||
#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
|
||||
struct fscrypt_context_v1 {
|
||||
u8 version;
|
||||
u8 contents_encryption_mode;
|
||||
u8 filenames_encryption_mode;
|
||||
u8 flags;
|
||||
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
|
||||
u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
|
||||
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
|
||||
};
|
||||
|
||||
Note that :c:type:`struct fscrypt_context` contains the same
|
||||
information as :c:type:`struct fscrypt_policy` (see `Setting an
|
||||
encryption policy`_), except that :c:type:`struct fscrypt_context`
|
||||
also contains a nonce. The nonce is randomly generated by the kernel
|
||||
and is used to derive the inode's encryption key as described in
|
||||
`Per-file keys`_.
|
||||
#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
|
||||
struct fscrypt_context_v2 {
|
||||
u8 version;
|
||||
u8 contents_encryption_mode;
|
||||
u8 filenames_encryption_mode;
|
||||
u8 flags;
|
||||
u8 __reserved[4];
|
||||
u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
|
||||
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
|
||||
};
|
||||
|
||||
The context structs contain the same information as the corresponding
|
||||
policy structs (see `Setting an encryption policy`_), except that the
|
||||
context structs also contain a nonce. The nonce is randomly generated
|
||||
by the kernel and is used as KDF input or as a tweak to cause
|
||||
different files to be encrypted differently; see `Per-file keys`_ and
|
||||
`DIRECT_KEY and per-mode keys`_.
|
||||
|
||||
Data path changes
|
||||
-----------------
|
||||
@@ -639,3 +1147,42 @@ Note that the precise way that filenames are presented to userspace
|
||||
without the key is subject to change in the future. It is only meant
|
||||
as a way to temporarily present valid filenames so that commands like
|
||||
``rm -r`` work as expected on encrypted directories.
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
To test fscrypt, use xfstests, which is Linux's de facto standard
|
||||
filesystem test suite. First, run all the tests in the "encrypt"
|
||||
group on the relevant filesystem(s). For example, to test ext4 and
|
||||
f2fs encryption using `kvm-xfstests
|
||||
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
|
||||
|
||||
kvm-xfstests -c ext4,f2fs -g encrypt
|
||||
|
||||
UBIFS encryption can also be tested this way, but it should be done in
|
||||
a separate command, and it takes some time for kvm-xfstests to set up
|
||||
emulated UBI volumes::
|
||||
|
||||
kvm-xfstests -c ubifs -g encrypt
|
||||
|
||||
No tests should fail. However, tests that use non-default encryption
|
||||
modes (e.g. generic/549 and generic/550) will be skipped if the needed
|
||||
algorithms were not built into the kernel's crypto API. Also, tests
|
||||
that access the raw block device (e.g. generic/399, generic/548,
|
||||
generic/549, generic/550) will be skipped on UBIFS.
|
||||
|
||||
Besides running the "encrypt" group tests, for ext4 and f2fs it's also
|
||||
possible to run most xfstests with the "test_dummy_encryption" mount
|
||||
option. This option causes all new files to be automatically
|
||||
encrypted with a dummy key, without having to make any API calls.
|
||||
This tests the encrypted I/O paths more thoroughly. To do this with
|
||||
kvm-xfstests, use the "encrypt" filesystem configuration::
|
||||
|
||||
kvm-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
|
||||
|
||||
Because this runs many more tests than "-g encrypt" does, it takes
|
||||
much longer to run; so also consider using `gce-xfstests
|
||||
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/gce-xfstests.md>`_
|
||||
instead of kvm-xfstests::
|
||||
|
||||
gce-xfstests -c ext4/encrypt,f2fs/encrypt -g auto
|
||||
|
||||
726
Documentation/filesystems/fsverity.rst
Normal file
726
Documentation/filesystems/fsverity.rst
Normal file
@@ -0,0 +1,726 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
.. _fsverity:
|
||||
|
||||
=======================================================
|
||||
fs-verity: read-only file-based authenticity protection
|
||||
=======================================================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
fs-verity (``fs/verity/``) is a support layer that filesystems can
|
||||
hook into to support transparent integrity and authenticity protection
|
||||
of read-only files. Currently, it is supported by the ext4 and f2fs
|
||||
filesystems. Like fscrypt, not too much filesystem-specific code is
|
||||
needed to support fs-verity.
|
||||
|
||||
fs-verity is similar to `dm-verity
|
||||
<https://www.kernel.org/doc/Documentation/device-mapper/verity.txt>`_
|
||||
but works on files rather than block devices. On regular files on
|
||||
filesystems supporting fs-verity, userspace can execute an ioctl that
|
||||
causes the filesystem to build a Merkle tree for the file and persist
|
||||
it to a filesystem-specific location associated with the file.
|
||||
|
||||
After this, the file is made readonly, and all reads from the file are
|
||||
automatically verified against the file's Merkle tree. Reads of any
|
||||
corrupted data, including mmap reads, will fail.
|
||||
|
||||
Userspace can use another ioctl to retrieve the root hash (actually
|
||||
the "file measurement", which is a hash that includes the root hash)
|
||||
that fs-verity is enforcing for the file. This ioctl executes in
|
||||
constant time, regardless of the file size.
|
||||
|
||||
fs-verity is essentially a way to hash a file in constant time,
|
||||
subject to the caveat that reads which would violate the hash will
|
||||
fail at runtime.
|
||||
|
||||
Use cases
|
||||
=========
|
||||
|
||||
By itself, the base fs-verity feature only provides integrity
|
||||
protection, i.e. detection of accidental (non-malicious) corruption.
|
||||
|
||||
However, because fs-verity makes retrieving the file hash extremely
|
||||
efficient, it's primarily meant to be used as a tool to support
|
||||
authentication (detection of malicious modifications) or auditing
|
||||
(logging file hashes before use).
|
||||
|
||||
Trusted userspace code (e.g. operating system code running on a
|
||||
read-only partition that is itself authenticated by dm-verity) can
|
||||
authenticate the contents of an fs-verity file by using the
|
||||
`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
|
||||
digital signature of it.
|
||||
|
||||
A standard file hash could be used instead of fs-verity. However,
|
||||
this is inefficient if the file is large and only a small portion may
|
||||
be accessed. This is often the case for Android application package
|
||||
(APK) files, for example. These typically contain many translations,
|
||||
classes, and other resources that are infrequently or even never
|
||||
accessed on a particular device. It would be slow and wasteful to
|
||||
read and hash the entire file before starting the application.
|
||||
|
||||
Unlike an ahead-of-time hash, fs-verity also re-verifies data each
|
||||
time it's paged in. This ensures that malicious disk firmware can't
|
||||
undetectably change the contents of the file at runtime.
|
||||
|
||||
fs-verity does not replace or obsolete dm-verity. dm-verity should
|
||||
still be used on read-only filesystems. fs-verity is for files that
|
||||
must live on a read-write filesystem because they are independently
|
||||
updated and potentially user-installed, so dm-verity cannot be used.
|
||||
|
||||
The base fs-verity feature is a hashing mechanism only; actually
|
||||
authenticating the files is up to userspace. However, to meet some
|
||||
users' needs, fs-verity optionally supports a simple signature
|
||||
verification mechanism where users can configure the kernel to require
|
||||
that all fs-verity files be signed by a key loaded into a keyring; see
|
||||
`Built-in signature verification`_. Support for fs-verity file hashes
|
||||
in IMA (Integrity Measurement Architecture) policies is also planned.
|
||||
|
||||
User API
|
||||
========
|
||||
|
||||
FS_IOC_ENABLE_VERITY
|
||||
--------------------
|
||||
|
||||
The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file. It takes
|
||||
in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as
|
||||
follows::
|
||||
|
||||
struct fsverity_enable_arg {
|
||||
__u32 version;
|
||||
__u32 hash_algorithm;
|
||||
__u32 block_size;
|
||||
__u32 salt_size;
|
||||
__u64 salt_ptr;
|
||||
__u32 sig_size;
|
||||
__u32 __reserved1;
|
||||
__u64 sig_ptr;
|
||||
__u64 __reserved2[11];
|
||||
};
|
||||
|
||||
This structure contains the parameters of the Merkle tree to build for
|
||||
the file, and optionally contains a signature. It must be initialized
|
||||
as follows:
|
||||
|
||||
- ``version`` must be 1.
|
||||
- ``hash_algorithm`` must be the identifier for the hash algorithm to
|
||||
use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256. See
|
||||
``include/uapi/linux/fsverity.h`` for the list of possible values.
|
||||
- ``block_size`` must be the Merkle tree block size. Currently, this
|
||||
must be equal to the system page size, which is usually 4096 bytes.
|
||||
Other sizes may be supported in the future. This value is not
|
||||
necessarily the same as the filesystem block size.
|
||||
- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
|
||||
provided. The salt is a value that is prepended to every hashed
|
||||
block; it can be used to personalize the hashing for a particular
|
||||
file or device. Currently the maximum salt size is 32 bytes.
|
||||
- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
|
||||
provided.
|
||||
- ``sig_size`` is the size of the signature in bytes, or 0 if no
|
||||
signature is provided. Currently the signature is (somewhat
|
||||
arbitrarily) limited to 16128 bytes. See `Built-in signature
|
||||
verification`_ for more information.
|
||||
- ``sig_ptr`` is the pointer to the signature, or NULL if no
|
||||
signature is provided.
|
||||
- All reserved fields must be zeroed.
|
||||
|
||||
FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
|
||||
the file and persist it to a filesystem-specific location associated
|
||||
with the file, then mark the file as a verity file. This ioctl may
|
||||
take a long time to execute on large files, and it is interruptible by
|
||||
fatal signals.
|
||||
|
||||
FS_IOC_ENABLE_VERITY checks for write access to the inode. However,
|
||||
it must be executed on an O_RDONLY file descriptor and no processes
|
||||
can have the file open for writing. Attempts to open the file for
|
||||
writing while this ioctl is executing will fail with ETXTBSY. (This
|
||||
is necessary to guarantee that no writable file descriptors will exist
|
||||
after verity is enabled, and to guarantee that the file's contents are
|
||||
stable while the Merkle tree is being built over it.)
|
||||
|
||||
On success, FS_IOC_ENABLE_VERITY returns 0, and the file becomes a
|
||||
verity file. On failure (including the case of interruption by a
|
||||
fatal signal), no changes are made to the file.
|
||||
|
||||
FS_IOC_ENABLE_VERITY can fail with the following errors:
|
||||
|
||||
- ``EACCES``: the process does not have write access to the file
|
||||
- ``EBADMSG``: the signature is malformed
|
||||
- ``EBUSY``: this ioctl is already running on the file
|
||||
- ``EEXIST``: the file already has verity enabled
|
||||
- ``EFAULT``: the caller provided inaccessible memory
|
||||
- ``EINTR``: the operation was interrupted by a fatal signal
|
||||
- ``EINVAL``: unsupported version, hash algorithm, or block size; or
|
||||
reserved bits are set; or the file descriptor refers to neither a
|
||||
regular file nor a directory.
|
||||
- ``EISDIR``: the file descriptor refers to a directory
|
||||
- ``EKEYREJECTED``: the signature doesn't match the file
|
||||
- ``EMSGSIZE``: the salt or signature is too long
|
||||
- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate
|
||||
needed to verify the signature
|
||||
- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not
|
||||
available in the kernel's crypto API as currently configured (e.g.
|
||||
for SHA-512, missing CONFIG_CRYPTO_SHA512).
|
||||
- ``ENOTTY``: this type of filesystem does not implement fs-verity
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
|
||||
support; or the filesystem superblock has not had the 'verity'
|
||||
feature enabled on it; or the filesystem does not support fs-verity
|
||||
on this file. (See `Filesystem support`_.)
|
||||
- ``EPERM``: the file is append-only; or, a signature is required and
|
||||
one was not provided.
|
||||
- ``EROFS``: the filesystem is read-only
|
||||
- ``ETXTBSY``: someone has the file open for writing. This can be the
|
||||
caller's file descriptor, another open file descriptor, or the file
|
||||
reference held by a writable memory map.
|
||||
|
||||
FS_IOC_MEASURE_VERITY
|
||||
---------------------
|
||||
|
||||
The FS_IOC_MEASURE_VERITY ioctl retrieves the measurement of a verity
|
||||
file. The file measurement is a digest that cryptographically
|
||||
identifies the file contents that are being enforced on reads.
|
||||
|
||||
This ioctl takes in a pointer to a variable-length structure::
|
||||
|
||||
struct fsverity_digest {
|
||||
__u16 digest_algorithm;
|
||||
__u16 digest_size; /* input/output */
|
||||
__u8 digest[];
|
||||
};
|
||||
|
||||
``digest_size`` is an input/output field. On input, it must be
|
||||
initialized to the number of bytes allocated for the variable-length
|
||||
``digest`` field.
|
||||
|
||||
On success, 0 is returned and the kernel fills in the structure as
|
||||
follows:
|
||||
|
||||
- ``digest_algorithm`` will be the hash algorithm used for the file
|
||||
measurement. It will match ``fsverity_enable_arg::hash_algorithm``.
|
||||
- ``digest_size`` will be the size of the digest in bytes, e.g. 32
|
||||
for SHA-256. (This can be redundant with ``digest_algorithm``.)
|
||||
- ``digest`` will be the actual bytes of the digest.
|
||||
|
||||
FS_IOC_MEASURE_VERITY is guaranteed to execute in constant time,
|
||||
regardless of the size of the file.
|
||||
|
||||
FS_IOC_MEASURE_VERITY can fail with the following errors:
|
||||
|
||||
- ``EFAULT``: the caller provided inaccessible memory
|
||||
- ``ENODATA``: the file is not a verity file
|
||||
- ``ENOTTY``: this type of filesystem does not implement fs-verity
|
||||
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
|
||||
support, or the filesystem superblock has not had the 'verity'
|
||||
feature enabled on it. (See `Filesystem support`_.)
|
||||
- ``EOVERFLOW``: the digest is longer than the specified
|
||||
``digest_size`` bytes. Try providing a larger buffer.
|
||||
|
||||
FS_IOC_GETFLAGS
|
||||
---------------
|
||||
|
||||
The existing ioctl FS_IOC_GETFLAGS (which isn't specific to fs-verity)
|
||||
can also be used to check whether a file has fs-verity enabled or not.
|
||||
To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
|
||||
|
||||
The verity flag is not settable via FS_IOC_SETFLAGS. You must use
|
||||
FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
|
||||
|
||||
Accessing verity files
|
||||
======================
|
||||
|
||||
Applications can transparently access a verity file just like a
|
||||
non-verity one, with the following exceptions:
|
||||
|
||||
- Verity files are readonly. They cannot be opened for writing or
|
||||
truncate()d, even if the file mode bits allow it. Attempts to do
|
||||
one of these things will fail with EPERM. However, changes to
|
||||
metadata such as owner, mode, timestamps, and xattrs are still
|
||||
allowed, since these are not measured by fs-verity. Verity files
|
||||
can also still be renamed, deleted, and linked to.
|
||||
|
||||
- Direct I/O is not supported on verity files. Attempts to use direct
|
||||
I/O on such files will fall back to buffered I/O.
|
||||
|
||||
- DAX (Direct Access) is not supported on verity files, because this
|
||||
would circumvent the data verification.
|
||||
|
||||
- Reads of data that doesn't match the verity Merkle tree will fail
|
||||
with EIO (for read()) or SIGBUS (for mmap() reads).
|
||||
|
||||
- If the sysctl "fs.verity.require_signatures" is set to 1 and the
|
||||
file's verity measurement is not signed by a key in the fs-verity
|
||||
keyring, then opening the file will fail. See `Built-in signature
|
||||
verification`_.
|
||||
|
||||
Direct access to the Merkle tree is not supported. Therefore, if a
|
||||
verity file is copied, or is backed up and restored, then it will lose
|
||||
its "verity"-ness. fs-verity is primarily meant for files like
|
||||
executables that are managed by a package manager.
|
||||
|
||||
File measurement computation
|
||||
============================
|
||||
|
||||
This section describes how fs-verity hashes the file contents using a
|
||||
Merkle tree to produce the "file measurement" which cryptographically
|
||||
identifies the file contents. This algorithm is the same for all
|
||||
filesystems that support fs-verity.
|
||||
|
||||
Userspace only needs to be aware of this algorithm if it needs to
|
||||
compute the file measurement itself, e.g. in order to sign the file.
|
||||
|
||||
.. _fsverity_merkle_tree:
|
||||
|
||||
Merkle tree
|
||||
-----------
|
||||
|
||||
The file contents is divided into blocks, where the block size is
|
||||
configurable but is usually 4096 bytes. The end of the last block is
|
||||
zero-padded if needed. Each block is then hashed, producing the first
|
||||
level of hashes. Then, the hashes in this first level are grouped
|
||||
into 'blocksize'-byte blocks (zero-padding the ends as needed) and
|
||||
these blocks are hashed, producing the second level of hashes. This
|
||||
proceeds up the tree until only a single block remains. The hash of
|
||||
this block is the "Merkle tree root hash".
|
||||
|
||||
If the file fits in one block and is nonempty, then the "Merkle tree
|
||||
root hash" is simply the hash of the single data block. If the file
|
||||
is empty, then the "Merkle tree root hash" is all zeroes.
|
||||
|
||||
The "blocks" here are not necessarily the same as "filesystem blocks".
|
||||
|
||||
If a salt was specified, then it's zero-padded to the closest multiple
|
||||
of the input size of the hash algorithm's compression function, e.g.
|
||||
64 bytes for SHA-256 or 128 bytes for SHA-512. The padded salt is
|
||||
prepended to every data or Merkle tree block that is hashed.
|
||||
|
||||
The purpose of the block padding is to cause every hash to be taken
|
||||
over the same amount of data, which simplifies the implementation and
|
||||
keeps open more possibilities for hardware acceleration. The purpose
|
||||
of the salt padding is to make the salting "free" when the salted hash
|
||||
state is precomputed, then imported for each hash.
|
||||
|
||||
Example: in the recommended configuration of SHA-256 and 4K blocks,
|
||||
128 hash values fit in each block. Thus, each level of the Merkle
|
||||
tree is approximately 128 times smaller than the previous, and for
|
||||
large files the Merkle tree's size converges to approximately 1/127 of
|
||||
the original file size. However, for small files, the padding is
|
||||
significant, making the space overhead proportionally more.
|
||||
|
||||
.. _fsverity_descriptor:
|
||||
|
||||
fs-verity descriptor
|
||||
--------------------
|
||||
|
||||
By itself, the Merkle tree root hash is ambiguous. For example, it
|
||||
can't a distinguish a large file from a small second file whose data
|
||||
is exactly the top-level hash block of the first file. Ambiguities
|
||||
also arise from the convention of padding to the next block boundary.
|
||||
|
||||
To solve this problem, the verity file measurement is actually
|
||||
computed as a hash of the following structure, which contains the
|
||||
Merkle tree root hash as well as other fields such as the file size::
|
||||
|
||||
struct fsverity_descriptor {
|
||||
__u8 version; /* must be 1 */
|
||||
__u8 hash_algorithm; /* Merkle tree hash algorithm */
|
||||
__u8 log_blocksize; /* log2 of size of data and tree blocks */
|
||||
__u8 salt_size; /* size of salt in bytes; 0 if none */
|
||||
__le32 sig_size; /* must be 0 */
|
||||
__le64 data_size; /* size of file the Merkle tree is built over */
|
||||
__u8 root_hash[64]; /* Merkle tree root hash */
|
||||
__u8 salt[32]; /* salt prepended to each hashed block */
|
||||
__u8 __reserved[144]; /* must be 0's */
|
||||
};
|
||||
|
||||
Note that the ``sig_size`` field must be set to 0 for the purpose of
|
||||
computing the file measurement, even if a signature was provided (or
|
||||
will be provided) to `FS_IOC_ENABLE_VERITY`_.
|
||||
|
||||
Built-in signature verification
|
||||
===============================
|
||||
|
||||
With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
|
||||
a portion of an authentication policy (see `Use cases`_) in the
|
||||
kernel. Specifically, it adds support for:
|
||||
|
||||
1. At fs-verity module initialization time, a keyring ".fs-verity" is
|
||||
created. The root user can add trusted X.509 certificates to this
|
||||
keyring using the add_key() system call, then (when done)
|
||||
optionally use keyctl_restrict_keyring() to prevent additional
|
||||
certificates from being added.
|
||||
|
||||
2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
|
||||
detached signature in DER format of the file measurement. On
|
||||
success, this signature is persisted alongside the Merkle tree.
|
||||
Then, any time the file is opened, the kernel will verify the
|
||||
file's actual measurement against this signature, using the
|
||||
certificates in the ".fs-verity" keyring.
|
||||
|
||||
3. A new sysctl "fs.verity.require_signatures" is made available.
|
||||
When set to 1, the kernel requires that all verity files have a
|
||||
correctly signed file measurement as described in (2).
|
||||
|
||||
File measurements must be signed in the following format, which is
|
||||
similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
|
||||
|
||||
struct fsverity_signed_digest {
|
||||
char magic[8]; /* must be "FSVerity" */
|
||||
__le16 digest_algorithm;
|
||||
__le16 digest_size;
|
||||
__u8 digest[];
|
||||
};
|
||||
|
||||
fs-verity's built-in signature verification support is meant as a
|
||||
relatively simple mechanism that can be used to provide some level of
|
||||
authenticity protection for verity files, as an alternative to doing
|
||||
the signature verification in userspace or using IMA-appraisal.
|
||||
However, with this mechanism, userspace programs still need to check
|
||||
that the verity bit is set, and there is no protection against verity
|
||||
files being swapped around.
|
||||
|
||||
Filesystem support
|
||||
==================
|
||||
|
||||
fs-verity is currently supported by the ext4 and f2fs filesystems.
|
||||
The CONFIG_FS_VERITY kconfig option must be enabled to use fs-verity
|
||||
on either filesystem.
|
||||
|
||||
``include/linux/fsverity.h`` declares the interface between the
|
||||
``fs/verity/`` support layer and filesystems. Briefly, filesystems
|
||||
must provide an ``fsverity_operations`` structure that provides
|
||||
methods to read and write the verity metadata to a filesystem-specific
|
||||
location, including the Merkle tree blocks and
|
||||
``fsverity_descriptor``. Filesystems must also call functions in
|
||||
``fs/verity/`` at certain times, such as when a file is opened or when
|
||||
pages have been read into the pagecache. (See `Verifying data`_.)
|
||||
|
||||
ext4
|
||||
----
|
||||
|
||||
ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
|
||||
|
||||
To create verity files on an ext4 filesystem, the filesystem must have
|
||||
been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
|
||||
it. "verity" is an RO_COMPAT filesystem feature, so once set, old
|
||||
kernels will only be able to mount the filesystem readonly, and old
|
||||
versions of e2fsck will be unable to check the filesystem. Moreover,
|
||||
currently ext4 only supports mounting a filesystem with the "verity"
|
||||
feature when its block size is equal to PAGE_SIZE (often 4096 bytes).
|
||||
|
||||
ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files. It
|
||||
can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared.
|
||||
|
||||
ext4 also supports encryption, which can be used simultaneously with
|
||||
fs-verity. In this case, the plaintext data is verified rather than
|
||||
the ciphertext. This is necessary in order to make the file
|
||||
measurement meaningful, since every file is encrypted differently.
|
||||
|
||||
ext4 stores the verity metadata (Merkle tree and fsverity_descriptor)
|
||||
past the end of the file, starting at the first 64K boundary beyond
|
||||
i_size. This approach works because (a) verity files are readonly,
|
||||
and (b) pages fully beyond i_size aren't visible to userspace but can
|
||||
be read/written internally by ext4 with only some relatively small
|
||||
changes to ext4. This approach avoids having to depend on the
|
||||
EA_INODE feature and on rearchitecturing ext4's xattr support to
|
||||
support paging multi-gigabyte xattrs into memory, and to support
|
||||
encrypting xattrs. Note that the verity metadata *must* be encrypted
|
||||
when the file is, since it contains hashes of the plaintext data.
|
||||
|
||||
Currently, ext4 verity only supports the case where the Merkle tree
|
||||
block size, filesystem block size, and page size are all the same. It
|
||||
also only supports extent-based files.
|
||||
|
||||
f2fs
|
||||
----
|
||||
|
||||
f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
|
||||
|
||||
To create verity files on an f2fs filesystem, the filesystem must have
|
||||
been formatted with ``-O verity``.
|
||||
|
||||
f2fs sets the FADVISE_VERITY_BIT on-disk inode flag on verity files.
|
||||
It can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be
|
||||
cleared.
|
||||
|
||||
Like ext4, f2fs stores the verity metadata (Merkle tree and
|
||||
fsverity_descriptor) past the end of the file, starting at the first
|
||||
64K boundary beyond i_size. See explanation for ext4 above.
|
||||
Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
|
||||
which wouldn't be enough for even a single Merkle tree block.
|
||||
|
||||
Currently, f2fs verity only supports a Merkle tree block size of 4096.
|
||||
Also, f2fs doesn't support enabling verity on files that currently
|
||||
have atomic or volatile writes pending.
|
||||
|
||||
Implementation details
|
||||
======================
|
||||
|
||||
Verifying data
|
||||
--------------
|
||||
|
||||
fs-verity ensures that all reads of a verity file's data are verified,
|
||||
regardless of which syscall is used to do the read (e.g. mmap(),
|
||||
read(), pread()) and regardless of whether it's the first read or a
|
||||
later read (unless the later read can return cached data that was
|
||||
already verified). Below, we describe how filesystems implement this.
|
||||
|
||||
Pagecache
|
||||
~~~~~~~~~
|
||||
|
||||
For filesystems using Linux's pagecache, the ``->readpage()`` and
|
||||
``->readpages()`` methods must be modified to verify pages before they
|
||||
are marked Uptodate. Merely hooking ``->read_iter()`` would be
|
||||
insufficient, since ``->read_iter()`` is not used for memory maps.
|
||||
|
||||
Therefore, fs/verity/ provides a function fsverity_verify_page() which
|
||||
verifies a page that has been read into the pagecache of a verity
|
||||
inode, but is still locked and not Uptodate, so it's not yet readable
|
||||
by userspace. As needed to do the verification,
|
||||
fsverity_verify_page() will call back into the filesystem to read
|
||||
Merkle tree pages via fsverity_operations::read_merkle_tree_page().
|
||||
|
||||
fsverity_verify_page() returns false if verification failed; in this
|
||||
case, the filesystem must not set the page Uptodate. Following this,
|
||||
as per the usual Linux pagecache behavior, attempts by userspace to
|
||||
read() from the part of the file containing the page will fail with
|
||||
EIO, and accesses to the page within a memory map will raise SIGBUS.
|
||||
|
||||
fsverity_verify_page() currently only supports the case where the
|
||||
Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes).
|
||||
|
||||
In principle, fsverity_verify_page() verifies the entire path in the
|
||||
Merkle tree from the data page to the root hash. However, for
|
||||
efficiency the filesystem may cache the hash pages. Therefore,
|
||||
fsverity_verify_page() only ascends the tree reading hash pages until
|
||||
an already-verified hash page is seen, as indicated by the PageChecked
|
||||
bit being set. It then verifies the path to that page.
|
||||
|
||||
This optimization, which is also used by dm-verity, results in
|
||||
excellent sequential read performance. This is because usually (e.g.
|
||||
127 in 128 times for 4K blocks and SHA-256) the hash page from the
|
||||
bottom level of the tree will already be cached and checked from
|
||||
reading a previous data page. However, random reads perform worse.
|
||||
|
||||
Block device based filesystems
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Block device based filesystems (e.g. ext4 and f2fs) in Linux also use
|
||||
the pagecache, so the above subsection applies too. However, they
|
||||
also usually read many pages from a file at once, grouped into a
|
||||
structure called a "bio". To make it easier for these types of
|
||||
filesystems to support fs-verity, fs/verity/ also provides a function
|
||||
fsverity_verify_bio() which verifies all pages in a bio.
|
||||
|
||||
ext4 and f2fs also support encryption. If a verity file is also
|
||||
encrypted, the pages must be decrypted before being verified. To
|
||||
support this, these filesystems allocate a "post-read context" for
|
||||
each bio and store it in ``->bi_private``::
|
||||
|
||||
struct bio_post_read_ctx {
|
||||
struct bio *bio;
|
||||
struct work_struct work;
|
||||
unsigned int cur_step;
|
||||
unsigned int enabled_steps;
|
||||
};
|
||||
|
||||
``enabled_steps`` is a bitmask that specifies whether decryption,
|
||||
verity, or both is enabled. After the bio completes, for each needed
|
||||
postprocessing step the filesystem enqueues the bio_post_read_ctx on a
|
||||
workqueue, and then the workqueue work does the decryption or
|
||||
verification. Finally, pages where no decryption or verity error
|
||||
occurred are marked Uptodate, and the pages are unlocked.
|
||||
|
||||
Files on ext4 and f2fs may contain holes. Normally, ``->readpages()``
|
||||
simply zeroes holes and sets the corresponding pages Uptodate; no bios
|
||||
are issued. To prevent this case from bypassing fs-verity, these
|
||||
filesystems use fsverity_verify_page() to verify hole pages.
|
||||
|
||||
ext4 and f2fs disable direct I/O on verity files, since otherwise
|
||||
direct I/O would bypass fs-verity. (They also do the same for
|
||||
encrypted files.)
|
||||
|
||||
Userspace utility
|
||||
=================
|
||||
|
||||
This document focuses on the kernel, but a userspace utility for
|
||||
fs-verity can be found at:
|
||||
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git
|
||||
|
||||
See the README.md file in the fsverity-utils source tree for details,
|
||||
including examples of setting up fs-verity protected files.
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
To test fs-verity, use xfstests. For example, using `kvm-xfstests
|
||||
<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
|
||||
|
||||
kvm-xfstests -c ext4,f2fs -g verity
|
||||
|
||||
FAQ
|
||||
===
|
||||
|
||||
This section answers frequently asked questions about fs-verity that
|
||||
weren't already directly answered in other parts of this document.
|
||||
|
||||
:Q: Why isn't fs-verity part of IMA?
|
||||
:A: fs-verity and IMA (Integrity Measurement Architecture) have
|
||||
different focuses. fs-verity is a filesystem-level mechanism for
|
||||
hashing individual files using a Merkle tree. In contrast, IMA
|
||||
specifies a system-wide policy that specifies which files are
|
||||
hashed and what to do with those hashes, such as log them,
|
||||
authenticate them, or add them to a measurement list.
|
||||
|
||||
IMA is planned to support the fs-verity hashing mechanism as an
|
||||
alternative to doing full file hashes, for people who want the
|
||||
performance and security benefits of the Merkle tree based hash.
|
||||
But it doesn't make sense to force all uses of fs-verity to be
|
||||
through IMA. As a standalone filesystem feature, fs-verity
|
||||
already meets many users' needs, and it's testable like other
|
||||
filesystem features e.g. with xfstests.
|
||||
|
||||
:Q: Isn't fs-verity useless because the attacker can just modify the
|
||||
hashes in the Merkle tree, which is stored on-disk?
|
||||
:A: To verify the authenticity of an fs-verity file you must verify
|
||||
the authenticity of the "file measurement", which is basically the
|
||||
root hash of the Merkle tree. See `Use cases`_.
|
||||
|
||||
:Q: Isn't fs-verity useless because the attacker can just replace a
|
||||
verity file with a non-verity one?
|
||||
:A: See `Use cases`_. In the initial use case, it's really trusted
|
||||
userspace code that authenticates the files; fs-verity is just a
|
||||
tool to do this job efficiently and securely. The trusted
|
||||
userspace code will consider non-verity files to be inauthentic.
|
||||
|
||||
:Q: Why does the Merkle tree need to be stored on-disk? Couldn't you
|
||||
store just the root hash?
|
||||
:A: If the Merkle tree wasn't stored on-disk, then you'd have to
|
||||
compute the entire tree when the file is first accessed, even if
|
||||
just one byte is being read. This is a fundamental consequence of
|
||||
how Merkle tree hashing works. To verify a leaf node, you need to
|
||||
verify the whole path to the root hash, including the root node
|
||||
(the thing which the root hash is a hash of). But if the root
|
||||
node isn't stored on-disk, you have to compute it by hashing its
|
||||
children, and so on until you've actually hashed the entire file.
|
||||
|
||||
That defeats most of the point of doing a Merkle tree-based hash,
|
||||
since if you have to hash the whole file ahead of time anyway,
|
||||
then you could simply do sha256(file) instead. That would be much
|
||||
simpler, and a bit faster too.
|
||||
|
||||
It's true that an in-memory Merkle tree could still provide the
|
||||
advantage of verification on every read rather than just on the
|
||||
first read. However, it would be inefficient because every time a
|
||||
hash page gets evicted (you can't pin the entire Merkle tree into
|
||||
memory, since it may be very large), in order to restore it you
|
||||
again need to hash everything below it in the tree. This again
|
||||
defeats most of the point of doing a Merkle tree-based hash, since
|
||||
a single block read could trigger re-hashing gigabytes of data.
|
||||
|
||||
:Q: But couldn't you store just the leaf nodes and compute the rest?
|
||||
:A: See previous answer; this really just moves up one level, since
|
||||
one could alternatively interpret the data blocks as being the
|
||||
leaf nodes of the Merkle tree. It's true that the tree can be
|
||||
computed much faster if the leaf level is stored rather than just
|
||||
the data, but that's only because each level is less than 1% the
|
||||
size of the level below (assuming the recommended settings of
|
||||
SHA-256 and 4K blocks). For the exact same reason, by storing
|
||||
"just the leaf nodes" you'd already be storing over 99% of the
|
||||
tree, so you might as well simply store the whole tree.
|
||||
|
||||
:Q: Can the Merkle tree be built ahead of time, e.g. distributed as
|
||||
part of a package that is installed to many computers?
|
||||
:A: This isn't currently supported. It was part of the original
|
||||
design, but was removed to simplify the kernel UAPI and because it
|
||||
wasn't a critical use case. Files are usually installed once and
|
||||
used many times, and cryptographic hashing is somewhat fast on
|
||||
most modern processors.
|
||||
|
||||
:Q: Why doesn't fs-verity support writes?
|
||||
:A: Write support would be very difficult and would require a
|
||||
completely different design, so it's well outside the scope of
|
||||
fs-verity. Write support would require:
|
||||
|
||||
- A way to maintain consistency between the data and hashes,
|
||||
including all levels of hashes, since corruption after a crash
|
||||
(especially of potentially the entire file!) is unacceptable.
|
||||
The main options for solving this are data journalling,
|
||||
copy-on-write, and log-structured volume. But it's very hard to
|
||||
retrofit existing filesystems with new consistency mechanisms.
|
||||
Data journalling is available on ext4, but is very slow.
|
||||
|
||||
- Rebuilding the the Merkle tree after every write, which would be
|
||||
extremely inefficient. Alternatively, a different authenticated
|
||||
dictionary structure such as an "authenticated skiplist" could
|
||||
be used. However, this would be far more complex.
|
||||
|
||||
Compare it to dm-verity vs. dm-integrity. dm-verity is very
|
||||
simple: the kernel just verifies read-only data against a
|
||||
read-only Merkle tree. In contrast, dm-integrity supports writes
|
||||
but is slow, is much more complex, and doesn't actually support
|
||||
full-device authentication since it authenticates each sector
|
||||
independently, i.e. there is no "root hash". It doesn't really
|
||||
make sense for the same device-mapper target to support these two
|
||||
very different cases; the same applies to fs-verity.
|
||||
|
||||
:Q: Since verity files are immutable, why isn't the immutable bit set?
|
||||
:A: The existing "immutable" bit (FS_IMMUTABLE_FL) already has a
|
||||
specific set of semantics which not only make the file contents
|
||||
read-only, but also prevent the file from being deleted, renamed,
|
||||
linked to, or having its owner or mode changed. These extra
|
||||
properties are unwanted for fs-verity, so reusing the immutable
|
||||
bit isn't appropriate.
|
||||
|
||||
:Q: Why does the API use ioctls instead of setxattr() and getxattr()?
|
||||
:A: Abusing the xattr interface for basically arbitrary syscalls is
|
||||
heavily frowned upon by most of the Linux filesystem developers.
|
||||
An xattr should really just be an xattr on-disk, not an API to
|
||||
e.g. magically trigger construction of a Merkle tree.
|
||||
|
||||
:Q: Does fs-verity support remote filesystems?
|
||||
:A: Only ext4 and f2fs support is implemented currently, but in
|
||||
principle any filesystem that can store per-file verity metadata
|
||||
can support fs-verity, regardless of whether it's local or remote.
|
||||
Some filesystems may have fewer options of where to store the
|
||||
verity metadata; one possibility is to store it past the end of
|
||||
the file and "hide" it from userspace by manipulating i_size. The
|
||||
data verification functions provided by ``fs/verity/`` also assume
|
||||
that the filesystem uses the Linux pagecache, but both local and
|
||||
remote filesystems normally do so.
|
||||
|
||||
:Q: Why is anything filesystem-specific at all? Shouldn't fs-verity
|
||||
be implemented entirely at the VFS level?
|
||||
:A: There are many reasons why this is not possible or would be very
|
||||
difficult, including the following:
|
||||
|
||||
- To prevent bypassing verification, pages must not be marked
|
||||
Uptodate until they've been verified. Currently, each
|
||||
filesystem is responsible for marking pages Uptodate via
|
||||
``->readpages()``. Therefore, currently it's not possible for
|
||||
the VFS to do the verification on its own. Changing this would
|
||||
require significant changes to the VFS and all filesystems.
|
||||
|
||||
- It would require defining a filesystem-independent way to store
|
||||
the verity metadata. Extended attributes don't work for this
|
||||
because (a) the Merkle tree may be gigabytes, but many
|
||||
filesystems assume that all xattrs fit into a single 4K
|
||||
filesystem block, and (b) ext4 and f2fs encryption doesn't
|
||||
encrypt xattrs, yet the Merkle tree *must* be encrypted when the
|
||||
file contents are, because it stores hashes of the plaintext
|
||||
file contents.
|
||||
|
||||
So the verity metadata would have to be stored in an actual
|
||||
file. Using a separate file would be very ugly, since the
|
||||
metadata is fundamentally part of the file to be protected, and
|
||||
it could cause problems where users could delete the real file
|
||||
but not the metadata file or vice versa. On the other hand,
|
||||
having it be in the same file would break applications unless
|
||||
filesystems' notion of i_size were divorced from the VFS's,
|
||||
which would be complex and require changes to all filesystems.
|
||||
|
||||
- It's desirable that FS_IOC_ENABLE_VERITY uses the filesystem's
|
||||
transaction mechanism so that either the file ends up with
|
||||
verity enabled, or no changes were made. Allowing intermediate
|
||||
states to occur after a crash may cause problems.
|
||||
@@ -315,3 +315,15 @@ exported for use by modules.
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: fs/pipe.c
|
||||
|
||||
Encryption API
|
||||
==============
|
||||
|
||||
A library which filesystems can hook into to support transparent
|
||||
encryption of files and directories.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
fscrypt
|
||||
fsverity
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
Copyright (C) 1999, 2000 Bruce Tenison
|
||||
Portions Copyright (C) 1999, 2000 David Nelson
|
||||
Thanks to David Nelson for guidance and the usage of the scanner.txt
|
||||
and scanner.c files to model our driver and this informative file.
|
||||
|
||||
Mar. 2, 2000
|
||||
|
||||
CHANGES
|
||||
|
||||
- Initial Revision
|
||||
|
||||
|
||||
OVERVIEW
|
||||
|
||||
This README will address issues regarding how to configure the kernel
|
||||
to access a RIO 500 mp3 player.
|
||||
Before I explain how to use this to access the Rio500 please be warned:
|
||||
|
||||
W A R N I N G:
|
||||
--------------
|
||||
|
||||
Please note that this software is still under development. The authors
|
||||
are in no way responsible for any damage that may occur, no matter how
|
||||
inconsequential.
|
||||
|
||||
It seems that the Rio has a problem when sending .mp3 with low batteries.
|
||||
I suggest when the batteries are low and you want to transfer stuff that you
|
||||
replace it with a fresh one. In my case, what happened is I lost two 16kb
|
||||
blocks (they are no longer usable to store information to it). But I don't
|
||||
know if that's normal or not; it could simply be a problem with the flash
|
||||
memory.
|
||||
|
||||
In an extreme case, I left my Rio playing overnight and the batteries wore
|
||||
down to nothing and appear to have corrupted the flash memory. My RIO
|
||||
needed to be replaced as a result. Diamond tech support is aware of the
|
||||
problem. Do NOT allow your batteries to wear down to nothing before
|
||||
changing them. It appears RIO 500 firmware does not handle low battery
|
||||
power well at all.
|
||||
|
||||
On systems with OHCI controllers, the kernel OHCI code appears to have
|
||||
power on problems with some chipsets. If you are having problems
|
||||
connecting to your RIO 500, try turning it on first and then plugging it
|
||||
into the USB cable.
|
||||
|
||||
Contact information:
|
||||
--------------------
|
||||
|
||||
The main page for the project is hosted at sourceforge.net in the following
|
||||
URL: <http://rio500.sourceforge.net>. You can also go to the project's
|
||||
sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
|
||||
There is also a mailing list: rio500-users@lists.sourceforge.net
|
||||
|
||||
Authors:
|
||||
-------
|
||||
|
||||
Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith
|
||||
Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
|
||||
things work there. Bruce Tenison <btenison@dibbs.net> is adding support
|
||||
for .fon files and also does testing. The program will mostly sure be
|
||||
re-written and Pete Ikusz along with the rest will re-design it. I would
|
||||
also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use
|
||||
with some important information regarding the communication with the Rio.
|
||||
|
||||
ADDITIONAL INFORMATION and Userspace tools
|
||||
|
||||
http://rio500.sourceforge.net/
|
||||
|
||||
|
||||
REQUIREMENTS
|
||||
|
||||
A host with a USB port. Ideally, either a UHCI (Intel) or OHCI
|
||||
(Compaq and others) hardware port should work.
|
||||
|
||||
A Linux development kernel (2.3.x) with USB support enabled or a
|
||||
backported version to linux-2.2.x. See http://www.linux-usb.org for
|
||||
more information on accomplishing this.
|
||||
|
||||
A Linux kernel with RIO 500 support enabled.
|
||||
|
||||
'lspci' which is only needed to determine the type of USB hardware
|
||||
available in your machine.
|
||||
|
||||
CONFIGURATION
|
||||
|
||||
Using `lspci -v`, determine the type of USB hardware available.
|
||||
|
||||
If you see something like:
|
||||
|
||||
USB Controller: ......
|
||||
Flags: .....
|
||||
I/O ports at ....
|
||||
|
||||
Then you have a UHCI based controller.
|
||||
|
||||
If you see something like:
|
||||
|
||||
USB Controller: .....
|
||||
Flags: ....
|
||||
Memory at .....
|
||||
|
||||
Then you have a OHCI based controller.
|
||||
|
||||
Using `make menuconfig` or your preferred method for configuring the
|
||||
kernel, select 'Support for USB', 'OHCI/UHCI' depending on your
|
||||
hardware (determined from the steps above), 'USB Diamond Rio500 support', and
|
||||
'Preliminary USB device filesystem'. Compile and install the modules
|
||||
(you may need to execute `depmod -a` to update the module
|
||||
dependencies).
|
||||
|
||||
Add a device for the USB rio500:
|
||||
`mknod /dev/usb/rio500 c 180 64`
|
||||
|
||||
Set appropriate permissions for /dev/usb/rio500 (don't forget about
|
||||
group and world permissions). Both read and write permissions are
|
||||
required for proper operation.
|
||||
|
||||
Load the appropriate modules (if compiled as modules):
|
||||
|
||||
OHCI:
|
||||
modprobe usbcore
|
||||
modprobe usb-ohci
|
||||
modprobe rio500
|
||||
|
||||
UHCI:
|
||||
modprobe usbcore
|
||||
modprobe usb-uhci (or uhci)
|
||||
modprobe rio500
|
||||
|
||||
That's it. The Rio500 Utils at: http://rio500.sourceforge.net should
|
||||
be able to access the rio500.
|
||||
|
||||
BUGS
|
||||
|
||||
If you encounter any problems feel free to drop me an email.
|
||||
|
||||
Bruce Tenison
|
||||
btenison@dibbs.net
|
||||
|
||||
21
MAINTAINERS
21
MAINTAINERS
@@ -5649,11 +5649,25 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt.git
|
||||
S: Supported
|
||||
F: fs/crypto/
|
||||
F: include/linux/fscrypt*.h
|
||||
F: include/uapi/linux/fscrypt.h
|
||||
F: Documentation/filesystems/fscrypt.rst
|
||||
|
||||
FUJITSU FR-V (FRV) PORT
|
||||
S: Orphan
|
||||
F: arch/frv/
|
||||
|
||||
FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION
|
||||
M: Eric Biggers <ebiggers@kernel.org>
|
||||
M: Theodore Y. Ts'o <tytso@mit.edu>
|
||||
L: linux-fscrypt@vger.kernel.org
|
||||
Q: https://patchwork.kernel.org/project/linux-fscrypt/list/
|
||||
T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity
|
||||
S: Supported
|
||||
F: fs/verity/
|
||||
F: include/linux/fsverity.h
|
||||
F: include/uapi/linux/fsverity.h
|
||||
F: Documentation/filesystems/fsverity.rst
|
||||
|
||||
FUJITSU LAPTOP EXTRAS
|
||||
M: Jonathan Woithe <jwoithe@just42.net>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
@@ -13886,13 +13900,6 @@ W: http://www.linux-usb.org/usbnet
|
||||
S: Maintained
|
||||
F: drivers/net/usb/dm9601.c
|
||||
|
||||
USB DIAMOND RIO500 DRIVER
|
||||
M: Cesar Miquel <miquel@df.uba.ar>
|
||||
L: rio500-users@lists.sourceforge.net
|
||||
W: http://rio500.sourceforge.net
|
||||
S: Maintained
|
||||
F: drivers/usb/misc/rio500*
|
||||
|
||||
USB EHCI DRIVER
|
||||
M: Alan Stern <stern@rowland.harvard.edu>
|
||||
L: linux-usb@vger.kernel.org
|
||||
|
||||
16
Makefile
16
Makefile
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 14
|
||||
SUBLEVEL = 141
|
||||
SUBLEVEL = 150
|
||||
EXTRAVERSION =
|
||||
NAME = Petit Gorille
|
||||
|
||||
@@ -642,7 +642,9 @@ all: vmlinux
|
||||
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
|
||||
KBUILD_AFLAGS += $(call cc-option,-fno-PIE)
|
||||
CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
|
||||
CFLAGS_GCOV := -fprofile-arcs -ftest-coverage \
|
||||
$(call cc-option,-fno-tree-loop-im) \
|
||||
$(call cc-disable-warning,maybe-uninitialized,)
|
||||
CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,)
|
||||
export CFLAGS_GCOV CFLAGS_KCOV
|
||||
|
||||
@@ -841,7 +843,7 @@ export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux
|
||||
endif
|
||||
|
||||
ifdef CONFIG_CFI_CLANG
|
||||
cfi-clang-flags += -fsanitize=cfi
|
||||
cfi-clang-flags += -fsanitize=cfi $(call cc-option, -fsplit-lto-unit)
|
||||
DISABLE_CFI_CLANG := -fno-sanitize=cfi
|
||||
ifdef CONFIG_MODULES
|
||||
cfi-clang-flags += -fsanitize-cfi-cross-dso
|
||||
@@ -940,6 +942,10 @@ ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
|
||||
LDFLAGS_vmlinux += $(call ld-option, -X,)
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_RELR),y)
|
||||
LDFLAGS_vmlinux += --pack-dyn-relocs=relr
|
||||
endif
|
||||
|
||||
# Default kernel image to build when no specific target is given.
|
||||
# KBUILD_IMAGE may be overruled on the command line or
|
||||
# set in the environment
|
||||
@@ -1022,9 +1028,11 @@ mod_sign_cmd = true
|
||||
endif
|
||||
export mod_sign_cmd
|
||||
|
||||
HOST_LIBELF_LIBS = $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf)
|
||||
|
||||
ifdef CONFIG_STACK_VALIDATION
|
||||
has_libelf := $(call try-run,\
|
||||
echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0)
|
||||
echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0)
|
||||
ifeq ($(has_libelf),1)
|
||||
objtool_target := tools/objtool FORCE
|
||||
else
|
||||
|
||||
14
arch/Kconfig
14
arch/Kconfig
@@ -1042,4 +1042,18 @@ config HAVE_ARCH_COMPILER_H
|
||||
linux/compiler-*.h in order to override macro definitions that those
|
||||
headers generally provide.
|
||||
|
||||
# Select if the architecture has support for applying RELR relocations.
|
||||
config ARCH_HAS_RELR
|
||||
bool
|
||||
|
||||
config RELR
|
||||
bool "Use RELR relocation packing"
|
||||
depends on ARCH_HAS_RELR && TOOLS_SUPPORT_RELR
|
||||
default y
|
||||
help
|
||||
Store the kernel's dynamic relocations in the RELR relocation packing
|
||||
format. Requires a compatible linker (LLD supports this feature), as
|
||||
well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy
|
||||
are compatible).
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
*
|
||||
* Datamanual Revisions:
|
||||
*
|
||||
* AM572x Silicon Revision 2.0: SPRS953B, Revised November 2016
|
||||
* AM572x Silicon Revision 2.0: SPRS953F, Revised May 2019
|
||||
* AM572x Silicon Revision 1.1: SPRS915R, Revised November 2016
|
||||
*
|
||||
*/
|
||||
@@ -229,45 +229,45 @@
|
||||
|
||||
mmc3_pins_default: mmc3_pins_default {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
mmc3_pins_hs: mmc3_pins_hs {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
mmc3_pins_sdr12: mmc3_pins_sdr12 {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
mmc3_pins_sdr25: mmc3_pins_sdr25 {
|
||||
pinctrl-single,pins = <
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */
|
||||
DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */
|
||||
DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */
|
||||
DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */
|
||||
DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */
|
||||
DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */
|
||||
>;
|
||||
};
|
||||
|
||||
|
||||
@@ -426,6 +426,7 @@
|
||||
regulator-name = "vdd_ldo10";
|
||||
regulator-min-microvolt = <1800000>;
|
||||
regulator-max-microvolt = <1800000>;
|
||||
regulator-always-on;
|
||||
regulator-state-mem {
|
||||
regulator-off-in-suspend;
|
||||
};
|
||||
|
||||
@@ -426,6 +426,7 @@
|
||||
regulator-name = "vdd_ldo10";
|
||||
regulator-min-microvolt = <1800000>;
|
||||
regulator-max-microvolt = <1800000>;
|
||||
regulator-always-on;
|
||||
regulator-state-mem {
|
||||
regulator-off-in-suspend;
|
||||
};
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
<&clks IMX7D_ENET1_TIME_ROOT_CLK>;
|
||||
assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;
|
||||
assigned-clock-rates = <0>, <100000000>;
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
phy-handle = <ðphy0>;
|
||||
fsl,magic-packet;
|
||||
status = "okay";
|
||||
@@ -69,7 +69,7 @@
|
||||
<&clks IMX7D_ENET2_TIME_ROOT_CLK>;
|
||||
assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>;
|
||||
assigned-clock-rates = <0>, <100000000>;
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
phy-handle = <ðphy1>;
|
||||
fsl,magic-packet;
|
||||
status = "okay";
|
||||
|
||||
@@ -91,7 +91,6 @@ CONFIG_USB_SERIAL_PL2303=m
|
||||
CONFIG_USB_SERIAL_CYBERJACK=m
|
||||
CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_EXT2_FS=m
|
||||
CONFIG_EXT3_FS=m
|
||||
CONFIG_MSDOS_FS=y
|
||||
|
||||
@@ -197,7 +197,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYTHERM=m
|
||||
|
||||
@@ -588,7 +588,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYTHERM=m
|
||||
|
||||
@@ -334,7 +334,6 @@ CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_ADUTUX=m
|
||||
CONFIG_USB_SEVSEG=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYPRESS_CY7C63=m
|
||||
|
||||
@@ -191,7 +191,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYTHERM=m
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Google, Inc.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef __ASM_FIQ_GLUE_H
|
||||
#define __ASM_FIQ_GLUE_H
|
||||
|
||||
struct fiq_glue_handler {
|
||||
void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp);
|
||||
void (*resume)(struct fiq_glue_handler *h);
|
||||
};
|
||||
typedef void (*fiq_return_handler_t)(void);
|
||||
|
||||
int fiq_glue_register_handler(struct fiq_glue_handler *handler);
|
||||
int fiq_glue_set_return_handler(fiq_return_handler_t fiq_return);
|
||||
int fiq_glue_clear_return_handler(fiq_return_handler_t fiq_return);
|
||||
|
||||
#ifdef CONFIG_FIQ_GLUE
|
||||
void fiq_glue_resume(void);
|
||||
#else
|
||||
static inline void fiq_glue_resume(void) {}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -21,7 +21,6 @@ config KVM
|
||||
bool "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on MMU && OF
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select ARM_GIC
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
|
||||
@@ -131,6 +131,9 @@ static int __init omap4_sram_init(void)
|
||||
struct device_node *np;
|
||||
struct gen_pool *sram_pool;
|
||||
|
||||
if (!soc_is_omap44xx() && !soc_is_omap54xx())
|
||||
return 0;
|
||||
|
||||
np = of_find_compatible_node(NULL, NULL, "ti,omap4-mpu");
|
||||
if (!np)
|
||||
pr_warn("%s:Unable to allocate sram needed to handle errata I688\n",
|
||||
|
||||
@@ -389,7 +389,8 @@ static struct omap_hwmod dra7xx_dcan2_hwmod = {
|
||||
static struct omap_hwmod_class_sysconfig dra7xx_epwmss_sysc = {
|
||||
.rev_offs = 0x0,
|
||||
.sysc_offs = 0x4,
|
||||
.sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET,
|
||||
.sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
|
||||
SYSC_HAS_RESET_STATUS,
|
||||
.idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
|
||||
.sysc_fields = &omap_hwmod_sysc_type2,
|
||||
};
|
||||
|
||||
@@ -65,7 +65,7 @@ int zynq_cpun_start(u32 address, int cpu)
|
||||
* 0x4: Jump by mov instruction
|
||||
* 0x8: Jumping address
|
||||
*/
|
||||
memcpy((__force void *)zero, &zynq_secondary_trampoline,
|
||||
memcpy_toio(zero, &zynq_secondary_trampoline,
|
||||
trampoline_size);
|
||||
writel(address, zero + trampoline_size);
|
||||
|
||||
|
||||
@@ -195,6 +195,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low,
|
||||
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
|
||||
int pfn_valid(unsigned long pfn)
|
||||
{
|
||||
phys_addr_t addr = __pfn_to_phys(pfn);
|
||||
|
||||
if (__phys_to_pfn(addr) != pfn)
|
||||
return 0;
|
||||
|
||||
return memblock_is_map_memory(__pfn_to_phys(pfn));
|
||||
}
|
||||
EXPORT_SYMBOL(pfn_valid);
|
||||
@@ -722,7 +727,8 @@ static void update_sections_early(struct section_perm perms[], int n)
|
||||
if (t->flags & PF_KTHREAD)
|
||||
continue;
|
||||
for_each_thread(t, s)
|
||||
set_section_perms(perms, n, true, s->mm);
|
||||
if (s->mm)
|
||||
set_section_perms(perms, n, true, s->mm);
|
||||
}
|
||||
set_section_perms(perms, n, true, current->active_mm);
|
||||
set_section_perms(perms, n, true, &init_mm);
|
||||
|
||||
@@ -1175,6 +1175,22 @@ void __init adjust_lowmem_bounds(void)
|
||||
*/
|
||||
vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
|
||||
|
||||
/*
|
||||
* The first usable region must be PMD aligned. Mark its start
|
||||
* as MEMBLOCK_NOMAP if it isn't
|
||||
*/
|
||||
for_each_memblock(memory, reg) {
|
||||
if (!memblock_is_nomap(reg)) {
|
||||
if (!IS_ALIGNED(reg->base, PMD_SIZE)) {
|
||||
phys_addr_t len;
|
||||
|
||||
len = round_up(reg->base, PMD_SIZE) - reg->base;
|
||||
memblock_mark_nomap(reg->base, len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_memblock(memory, reg) {
|
||||
phys_addr_t block_start = reg->base;
|
||||
phys_addr_t block_end = reg->base + reg->size;
|
||||
|
||||
@@ -67,6 +67,7 @@ void samsung_wdt_reset(void)
|
||||
#ifdef CONFIG_OF
|
||||
static const struct of_device_id s3c2410_wdt_match[] = {
|
||||
{ .compatible = "samsung,s3c2410-wdt" },
|
||||
{ .compatible = "samsung,s3c6410-wdt" },
|
||||
{},
|
||||
};
|
||||
|
||||
|
||||
@@ -412,3 +412,5 @@
|
||||
395 common pkey_alloc sys_pkey_alloc
|
||||
396 common pkey_free sys_pkey_free
|
||||
397 common statx sys_statx
|
||||
424 common pidfd_send_signal sys_pidfd_send_signal
|
||||
434 common pidfd_open sys_pidfd_open
|
||||
|
||||
@@ -70,7 +70,8 @@ config ARM64
|
||||
select HAVE_ARCH_BITREVERSE
|
||||
select HAVE_ARCH_HUGE_VMAP
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
|
||||
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
|
||||
select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_MMAP_RND_BITS
|
||||
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
|
||||
@@ -861,6 +862,15 @@ config ARM64_SSBD
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config ARM64_TAGGED_ADDR_ABI
|
||||
bool "Enable the tagged user addresses syscall ABI"
|
||||
default y
|
||||
help
|
||||
When this option is enabled, user applications can opt in to a
|
||||
relaxed ABI via prctl() allowing tagged addresses to be passed
|
||||
to system calls as pointer arguments. For details, see
|
||||
Documentation/arm64/tagged-address-abi.rst.
|
||||
|
||||
menuconfig ARMV8_DEPRECATED
|
||||
bool "Emulate deprecated/obsolete ARMv8 instructions"
|
||||
depends on COMPAT
|
||||
@@ -1043,6 +1053,7 @@ config ARM64_MODULE_PLTS
|
||||
|
||||
config RELOCATABLE
|
||||
bool
|
||||
select ARCH_HAS_RELR
|
||||
help
|
||||
This builds the kernel as a Position Independent Executable (PIE),
|
||||
which retains all relocation metadata required to relocate the
|
||||
|
||||
@@ -128,12 +128,23 @@ ifeq ($(cc-name),clang)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, asm-operand-widths)
|
||||
endif
|
||||
|
||||
# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
|
||||
ifeq ($(CONFIG_KASAN_SW_TAGS), y)
|
||||
KASAN_SHADOW_SCALE_SHIFT := 4
|
||||
else
|
||||
KASAN_SHADOW_SCALE_SHIFT := 3
|
||||
endif
|
||||
|
||||
KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
|
||||
KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
|
||||
KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
|
||||
|
||||
# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
|
||||
# - (1 << (64 - KASAN_SHADOW_SCALE_SHIFT))
|
||||
# in 32-bit arithmetic
|
||||
KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
|
||||
(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
|
||||
+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
|
||||
- (1 << (64 - 32 - 3)) )) )
|
||||
(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
|
||||
+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) \
|
||||
- (1 << (64 - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) )) )
|
||||
|
||||
export TEXT_OFFSET GZFLAGS
|
||||
|
||||
|
||||
@@ -77,6 +77,7 @@
|
||||
pinctrl-0 = <&usb30_host_drv>;
|
||||
regulator-name = "vcc_host_5v";
|
||||
regulator-always-on;
|
||||
regulator-boot-on;
|
||||
vin-supply = <&vcc_sys>;
|
||||
};
|
||||
|
||||
@@ -87,6 +88,7 @@
|
||||
pinctrl-0 = <&usb20_host_drv>;
|
||||
regulator-name = "vcc_host1_5v";
|
||||
regulator-always-on;
|
||||
regulator-boot-on;
|
||||
vin-supply = <&vcc_sys>;
|
||||
};
|
||||
|
||||
|
||||
@@ -685,6 +685,7 @@
|
||||
<&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
|
||||
clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
|
||||
fifo-depth = <0x100>;
|
||||
max-frequency = <150000000>;
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
@@ -696,6 +697,7 @@
|
||||
<&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
|
||||
clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
|
||||
fifo-depth = <0x100>;
|
||||
max-frequency = <150000000>;
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
@@ -707,6 +709,7 @@
|
||||
<&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
|
||||
clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
|
||||
fifo-depth = <0x100>;
|
||||
max-frequency = <150000000>;
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ CONFIG_SGETMASK_SYSCALL=y
|
||||
# CONFIG_SYSFS_SYSCALL is not set
|
||||
CONFIG_KALLSYMS_ALL=y
|
||||
CONFIG_BPF_SYSCALL=y
|
||||
CONFIG_BPF_JIT_ALWAYS_ON=y
|
||||
CONFIG_EMBEDDED=y
|
||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||
# CONFIG_COMPAT_BRK is not set
|
||||
@@ -58,8 +59,6 @@ CONFIG_SETEND_EMULATION=y
|
||||
CONFIG_ARM64_SW_TTBR0_PAN=y
|
||||
CONFIG_ARM64_LSE_ATOMICS=y
|
||||
CONFIG_RANDOMIZE_BASE=y
|
||||
CONFIG_CMDLINE="console=ttyAMA0"
|
||||
CONFIG_CMDLINE_EXTEND=y
|
||||
# CONFIG_EFI is not set
|
||||
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
|
||||
CONFIG_COMPAT=y
|
||||
@@ -82,6 +81,8 @@ CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
CONFIG_XFRM_USER=y
|
||||
CONFIG_XFRM_INTERFACE=y
|
||||
CONFIG_XFRM_STATISTICS=y
|
||||
CONFIG_NET_KEY=y
|
||||
CONFIG_INET=y
|
||||
CONFIG_IP_MULTICAST=y
|
||||
@@ -179,17 +180,25 @@ CONFIG_IP6_NF_RAW=y
|
||||
CONFIG_L2TP=y
|
||||
CONFIG_NET_SCHED=y
|
||||
CONFIG_NET_SCH_HTB=y
|
||||
CONFIG_NET_SCH_NETEM=y
|
||||
CONFIG_NET_SCH_INGRESS=y
|
||||
CONFIG_NET_CLS_U32=y
|
||||
CONFIG_NET_EMATCH=y
|
||||
CONFIG_NET_EMATCH_U32=y
|
||||
CONFIG_NET_CLS_ACT=y
|
||||
CONFIG_VSOCKETS=y
|
||||
CONFIG_VIRTIO_VSOCKETS=y
|
||||
CONFIG_BPF_JIT=y
|
||||
CONFIG_CAN=y
|
||||
# CONFIG_CAN_BCM is not set
|
||||
# CONFIG_CAN_GW is not set
|
||||
CONFIG_CAN_VCAN=y
|
||||
CONFIG_CFG80211=y
|
||||
# CONFIG_CFG80211_DEFAULT_PS is not set
|
||||
CONFIG_MAC80211=y
|
||||
# CONFIG_MAC80211_RC_MINSTREL is not set
|
||||
CONFIG_RFKILL=y
|
||||
# CONFIG_UEVENT_HELPER is not set
|
||||
CONFIG_DEVTMPFS=y
|
||||
# CONFIG_ALLOW_DEV_COREDUMP is not set
|
||||
CONFIG_DEBUG_DEVRES=y
|
||||
CONFIG_OF_UNITTEST=y
|
||||
@@ -206,6 +215,7 @@ CONFIG_SCSI_VIRTIO=y
|
||||
CONFIG_MD=y
|
||||
CONFIG_BLK_DEV_DM=y
|
||||
CONFIG_DM_CRYPT=y
|
||||
CONFIG_DM_SNAPSHOT=y
|
||||
CONFIG_DM_UEVENT=y
|
||||
CONFIG_DM_VERITY=y
|
||||
CONFIG_DM_VERITY_FEC=y
|
||||
@@ -276,6 +286,7 @@ CONFIG_SERIAL_8250_NR_UARTS=48
|
||||
CONFIG_SERIAL_8250_EXTENDED=y
|
||||
CONFIG_SERIAL_8250_MANY_PORTS=y
|
||||
CONFIG_SERIAL_8250_SHARE_IRQ=y
|
||||
CONFIG_SERIAL_OF_PLATFORM=y
|
||||
CONFIG_SERIAL_AMBA_PL011=y
|
||||
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
|
||||
CONFIG_VIRTIO_CONSOLE=y
|
||||
@@ -370,6 +381,7 @@ CONFIG_MMC=y
|
||||
CONFIG_RTC_CLASS=y
|
||||
# CONFIG_RTC_HCTOSYS is not set
|
||||
# CONFIG_RTC_SYSTOHC is not set
|
||||
CONFIG_RTC_DRV_PL030=y
|
||||
CONFIG_RTC_DRV_PL031=y
|
||||
CONFIG_VIRTIO_PCI=y
|
||||
# CONFIG_VIRTIO_PCI_LEGACY is not set
|
||||
@@ -399,6 +411,8 @@ CONFIG_F2FS_FS_ENCRYPTION=y
|
||||
# CONFIG_DNOTIFY is not set
|
||||
CONFIG_QUOTA=y
|
||||
CONFIG_QFMT_V2=y
|
||||
CONFIG_FUSE_FS=y
|
||||
CONFIG_OVERLAY_FS=y
|
||||
CONFIG_MSDOS_FS=y
|
||||
CONFIG_VFAT_FS=y
|
||||
CONFIG_TMPFS=y
|
||||
@@ -428,7 +442,6 @@ CONFIG_LSM_MMAP_MIN_ADDR=65536
|
||||
CONFIG_HARDENED_USERCOPY=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
CONFIG_CRYPTO_ADIANTUM=y
|
||||
CONFIG_CRYPTO_SHA512=y
|
||||
CONFIG_CRYPTO_LZ4=y
|
||||
CONFIG_CRYPTO_ZSTD=y
|
||||
CONFIG_CRYPTO_ANSI_CPRNG=y
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
#ifndef __ASM_ASSEMBLER_H
|
||||
#define __ASM_ASSEMBLER_H
|
||||
|
||||
#include <asm-generic/export.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cputype.h>
|
||||
@@ -458,6 +460,13 @@ alternative_endif
|
||||
#else
|
||||
#define NOKPROBE(x)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
#define EXPORT_SYMBOL_NOKASAN(name)
|
||||
#else
|
||||
#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Emit a 64-bit absolute little endian symbol reference in a way that
|
||||
* ensures that it will be resolved at build time, even when building a
|
||||
|
||||
@@ -16,10 +16,12 @@
|
||||
* 0x400: for dynamic BRK instruction
|
||||
* 0x401: for compile time BRK instruction
|
||||
* 0x800: kernel-mode BUG() and WARN() traps
|
||||
* 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff)
|
||||
*/
|
||||
#define FAULT_BRK_IMM 0x100
|
||||
#define KGDB_DYN_DBG_BRK_IMM 0x400
|
||||
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
|
||||
#define BUG_BRK_IMM 0x800
|
||||
#define KASAN_BRK_IMM 0x900
|
||||
|
||||
#endif
|
||||
|
||||
@@ -46,6 +46,10 @@
|
||||
*/
|
||||
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
||||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/bitops.h>
|
||||
|
||||
@@ -73,7 +73,7 @@ __XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory")
|
||||
#undef __XCHG_CASE
|
||||
|
||||
#define __XCHG_GEN(sfx) \
|
||||
static inline unsigned long __xchg##sfx(unsigned long x, \
|
||||
static __always_inline unsigned long __xchg##sfx(unsigned long x, \
|
||||
volatile void *ptr, \
|
||||
int size) \
|
||||
{ \
|
||||
@@ -115,7 +115,7 @@ __XCHG_GEN(_mb)
|
||||
#define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
|
||||
|
||||
#define __CMPXCHG_GEN(sfx) \
|
||||
static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
|
||||
static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
|
||||
unsigned long old, \
|
||||
unsigned long new, \
|
||||
int size) \
|
||||
@@ -248,7 +248,7 @@ __CMPWAIT_CASE( , , 8);
|
||||
#undef __CMPWAIT_CASE
|
||||
|
||||
#define __CMPWAIT_GEN(sfx) \
|
||||
static inline void __cmpwait##sfx(volatile void *ptr, \
|
||||
static __always_inline void __cmpwait##sfx(volatile void *ptr, \
|
||||
unsigned long val, \
|
||||
int size) \
|
||||
{ \
|
||||
|
||||
@@ -4,15 +4,20 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/pgtable-types.h>
|
||||
|
||||
#define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag)
|
||||
#define arch_kasan_reset_tag(addr) __tag_reset(addr)
|
||||
#define arch_kasan_get_tag(addr) __tag_get(addr)
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
|
||||
/*
|
||||
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
|
||||
* KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
|
||||
* KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
|
||||
* where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
|
||||
*/
|
||||
#define KASAN_SHADOW_START (VA_START)
|
||||
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
|
||||
@@ -20,14 +25,16 @@
|
||||
/*
|
||||
* This value is used to map an address to the corresponding shadow
|
||||
* address by the following formula:
|
||||
* shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
|
||||
* shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
|
||||
*
|
||||
* (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
|
||||
* cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
|
||||
* should satisfy the following equation:
|
||||
* KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
|
||||
* (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
|
||||
* [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
|
||||
* addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
|
||||
* KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
|
||||
* (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
|
||||
*/
|
||||
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
|
||||
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \
|
||||
(64 - KASAN_SHADOW_SCALE_SHIFT)))
|
||||
|
||||
void kasan_init(void);
|
||||
void kasan_copy_shadow(pgd_t *pgdir);
|
||||
|
||||
@@ -80,12 +80,12 @@
|
||||
#define KERNEL_END _end
|
||||
|
||||
/*
|
||||
* KASAN requires 1/8th of the kernel virtual address space for the shadow
|
||||
* region. KASAN can bloat the stack significantly, so double the (minimum)
|
||||
* stack size when KASAN is in use.
|
||||
* Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
|
||||
* address space for the shadow region respectively. They can bloat the stack
|
||||
* significantly, so double the (minimum) stack size when they are in use.
|
||||
*/
|
||||
#ifdef CONFIG_KASAN
|
||||
#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3))
|
||||
#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
|
||||
#define KASAN_THREAD_SHIFT 1
|
||||
#else
|
||||
#define KASAN_SHADOW_SIZE (0)
|
||||
@@ -210,6 +210,26 @@ static inline unsigned long kaslr_offset(void)
|
||||
*/
|
||||
#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT)
|
||||
|
||||
/*
|
||||
* When dealing with data aborts, watchpoints, or instruction traps we may end
|
||||
* up with a tagged userland pointer. Clear the tag to get a sane pointer to
|
||||
* pass on to access_ok(), for instance.
|
||||
*/
|
||||
#define untagged_addr(addr) \
|
||||
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
|
||||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
#define __tag_shifted(tag) ((u64)(tag) << 56)
|
||||
#define __tag_set(addr, tag) (__typeof__(addr))( \
|
||||
((u64)(addr) & ~__tag_shifted(0xff)) | __tag_shifted(tag))
|
||||
#define __tag_reset(addr) untagged_addr(addr)
|
||||
#define __tag_get(addr) (__u8)((u64)(addr) >> 56)
|
||||
#else
|
||||
#define __tag_set(addr, tag) (addr)
|
||||
#define __tag_reset(addr) (addr)
|
||||
#define __tag_get(addr) 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Physical vs virtual RAM address space conversion. These are
|
||||
* private definitions which should NOT be used outside memory.h
|
||||
@@ -293,7 +313,13 @@ static inline void *phys_to_virt(phys_addr_t x)
|
||||
#define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
|
||||
#define __page_to_voff(kaddr) (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
|
||||
|
||||
#define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET))
|
||||
#define page_to_virt(page) ({ \
|
||||
unsigned long __addr = \
|
||||
((__page_to_voff(page)) | PAGE_OFFSET); \
|
||||
__addr = __tag_set(__addr, page_kasan_tag(page)); \
|
||||
((void *)__addr); \
|
||||
})
|
||||
|
||||
#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
|
||||
|
||||
#define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
|
||||
@@ -301,9 +327,10 @@ static inline void *phys_to_virt(phys_addr_t x)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET)
|
||||
#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \
|
||||
_virt_addr_valid(kaddr))
|
||||
#define _virt_addr_is_linear(kaddr) \
|
||||
(__tag_reset((u64)(kaddr)) >= PAGE_OFFSET)
|
||||
#define virt_addr_valid(kaddr) \
|
||||
(_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr))
|
||||
|
||||
#include <asm-generic/memory_model.h>
|
||||
|
||||
|
||||
@@ -275,6 +275,7 @@
|
||||
#define TCR_A1 (UL(1) << 22)
|
||||
#define TCR_ASID16 (UL(1) << 36)
|
||||
#define TCR_TBI0 (UL(1) << 37)
|
||||
#define TCR_TBI1 (UL(1) << 38)
|
||||
#define TCR_HA (UL(1) << 39)
|
||||
#define TCR_HD (UL(1) << 40)
|
||||
|
||||
|
||||
@@ -242,5 +242,13 @@ static inline void spin_lock_prefetch(const void *ptr)
|
||||
int cpu_enable_pan(void *__unused);
|
||||
int cpu_enable_cache_maint_trap(void *__unused);
|
||||
|
||||
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
|
||||
/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
|
||||
long set_tagged_addr_ctrl(unsigned long arg);
|
||||
long get_tagged_addr_ctrl(void);
|
||||
#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg)
|
||||
#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl()
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __ASM_PROCESSOR_H */
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#ifndef __ASM_STRING_H
|
||||
#define __ASM_STRING_H
|
||||
|
||||
#ifndef CONFIG_KASAN
|
||||
#define __HAVE_ARCH_STRRCHR
|
||||
extern char *strrchr(const char *, int c);
|
||||
|
||||
@@ -34,6 +35,13 @@ extern __kernel_size_t strlen(const char *);
|
||||
#define __HAVE_ARCH_STRNLEN
|
||||
extern __kernel_size_t strnlen(const char *, __kernel_size_t);
|
||||
|
||||
#define __HAVE_ARCH_MEMCMP
|
||||
extern int memcmp(const void *, const void *, size_t);
|
||||
|
||||
#define __HAVE_ARCH_MEMCHR
|
||||
extern void *memchr(const void *, int, __kernel_size_t);
|
||||
#endif
|
||||
|
||||
#define __HAVE_ARCH_MEMCPY
|
||||
extern void *memcpy(void *, const void *, __kernel_size_t);
|
||||
extern void *__memcpy(void *, const void *, __kernel_size_t);
|
||||
@@ -42,16 +50,10 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
|
||||
extern void *memmove(void *, const void *, __kernel_size_t);
|
||||
extern void *__memmove(void *, const void *, __kernel_size_t);
|
||||
|
||||
#define __HAVE_ARCH_MEMCHR
|
||||
extern void *memchr(const void *, int, __kernel_size_t);
|
||||
|
||||
#define __HAVE_ARCH_MEMSET
|
||||
extern void *memset(void *, int, __kernel_size_t);
|
||||
extern void *__memset(void *, int, __kernel_size_t);
|
||||
|
||||
#define __HAVE_ARCH_MEMCMP
|
||||
extern int memcmp(const void *, const void *, size_t);
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
|
||||
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
|
||||
void memcpy_flushcache(void *dst, const void *src, size_t cnt);
|
||||
|
||||
@@ -96,6 +96,7 @@ void arch_setup_new_exec(void);
|
||||
#define TIF_SINGLESTEP 21
|
||||
#define TIF_32BIT 22 /* 32bit process */
|
||||
#define TIF_SSBD 23 /* Wants SSB mitigation */
|
||||
#define TIF_TAGGED_ADDR 24 /* Allow tagged user addresses */
|
||||
|
||||
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
|
||||
@@ -76,6 +76,10 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
|
||||
{
|
||||
unsigned long limit = current_thread_info()->addr_limit;
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) &&
|
||||
test_thread_flag(TIF_TAGGED_ADDR))
|
||||
addr = untagged_addr(addr);
|
||||
|
||||
__chk_user_ptr(addr);
|
||||
asm volatile(
|
||||
// A + B <= C + 1 for all A,B,C, in four easy steps:
|
||||
@@ -97,13 +101,6 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
|
||||
return addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* When dealing with data aborts, watchpoints, or instruction traps we may end
|
||||
* up with a tagged userland pointer. Clear the tag to get a sane pointer to
|
||||
* pass on to access_ok(), for instance.
|
||||
*/
|
||||
#define untagged_addr(addr) sign_extend64(addr, 55)
|
||||
|
||||
#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size)
|
||||
#define user_addr_max get_fs
|
||||
|
||||
@@ -224,7 +221,8 @@ static inline void uaccess_enable_not_uao(void)
|
||||
|
||||
/*
|
||||
* Sanitise a uaccess pointer such that it becomes NULL if above the
|
||||
* current addr_limit.
|
||||
* current addr_limit. In case the pointer is tagged (has the top byte set),
|
||||
* untag the pointer before checking.
|
||||
*/
|
||||
#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
|
||||
static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
|
||||
@@ -232,10 +230,11 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
|
||||
void __user *safe_ptr;
|
||||
|
||||
asm volatile(
|
||||
" bics xzr, %1, %2\n"
|
||||
" bics xzr, %3, %2\n"
|
||||
" csel %0, %1, xzr, eq\n"
|
||||
: "=&r" (safe_ptr)
|
||||
: "r" (ptr), "r" (current_thread_info()->addr_limit)
|
||||
: "r" (ptr), "r" (current_thread_info()->addr_limit),
|
||||
"r" (untagged_addr(ptr))
|
||||
: "cc");
|
||||
|
||||
csdb();
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
|
||||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
|
||||
|
||||
#define __NR_compat_syscalls 398
|
||||
#define __NR_compat_syscalls 435
|
||||
#endif
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
|
||||
@@ -817,6 +817,10 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
|
||||
__SYSCALL(__NR_pkey_free, sys_pkey_free)
|
||||
#define __NR_statx 397
|
||||
__SYSCALL(__NR_statx, sys_statx)
|
||||
#define __NR_pidfd_send_signal 424
|
||||
__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
|
||||
#define __NR_pidfd_open 434
|
||||
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
|
||||
|
||||
/*
|
||||
* Please add new compat syscalls above this comment and update
|
||||
|
||||
@@ -44,20 +44,23 @@ EXPORT_SYMBOL(__arch_copy_in_user);
|
||||
EXPORT_SYMBOL(memstart_addr);
|
||||
|
||||
/* string / mem functions */
|
||||
#ifndef CONFIG_KASAN
|
||||
EXPORT_SYMBOL(strchr);
|
||||
EXPORT_SYMBOL(strrchr);
|
||||
EXPORT_SYMBOL(strcmp);
|
||||
EXPORT_SYMBOL(strncmp);
|
||||
EXPORT_SYMBOL(strlen);
|
||||
EXPORT_SYMBOL(strnlen);
|
||||
EXPORT_SYMBOL(memcmp);
|
||||
EXPORT_SYMBOL(memchr);
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(memset);
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
EXPORT_SYMBOL(memmove);
|
||||
EXPORT_SYMBOL(__memset);
|
||||
EXPORT_SYMBOL(__memcpy);
|
||||
EXPORT_SYMBOL(__memmove);
|
||||
EXPORT_SYMBOL(memchr);
|
||||
EXPORT_SYMBOL(memcmp);
|
||||
|
||||
/* atomic bitops */
|
||||
EXPORT_SYMBOL(set_bit);
|
||||
|
||||
@@ -855,8 +855,16 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
|
||||
return true;
|
||||
|
||||
/* Don't force KPTI for CPUs that are not vulnerable */
|
||||
if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
|
||||
switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) {
|
||||
case MIDR_CAVIUM_THUNDERX2:
|
||||
case MIDR_BRCM_VULCAN:
|
||||
case MIDR_CORTEX_A53:
|
||||
case MIDR_CORTEX_A55:
|
||||
case MIDR_CORTEX_A57:
|
||||
case MIDR_CORTEX_A72:
|
||||
case MIDR_CORTEX_A73:
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Defer to CPU feature registers */
|
||||
return !cpuid_feature_extract_unsigned_field(pfr0,
|
||||
|
||||
@@ -112,6 +112,8 @@ pe_header:
|
||||
* x23 stext() .. start_kernel() physical misalignment/KASLR offset
|
||||
* x28 __create_page_tables() callee preserved temp register
|
||||
* x19/x20 __primary_switch() callee preserved temp registers
|
||||
* x24 __primary_switch() .. relocate_kernel()
|
||||
* current RELR displacement
|
||||
*/
|
||||
ENTRY(stext)
|
||||
bl preserve_boot_args
|
||||
@@ -700,14 +702,93 @@ __relocate_kernel:
|
||||
|
||||
0: cmp x9, x10
|
||||
b.hs 1f
|
||||
ldp x11, x12, [x9], #24
|
||||
ldr x13, [x9, #-8]
|
||||
cmp w12, #R_AARCH64_RELATIVE
|
||||
ldp x12, x13, [x9], #24
|
||||
ldr x14, [x9, #-8]
|
||||
cmp w13, #R_AARCH64_RELATIVE
|
||||
b.ne 0b
|
||||
add x13, x13, x23 // relocate
|
||||
str x13, [x11, x23]
|
||||
add x14, x14, x23 // relocate
|
||||
str x14, [x12, x23]
|
||||
b 0b
|
||||
1: ret
|
||||
|
||||
1:
|
||||
#ifdef CONFIG_RELR
|
||||
/*
|
||||
* Apply RELR relocations.
|
||||
*
|
||||
* RELR is a compressed format for storing relative relocations. The
|
||||
* encoded sequence of entries looks like:
|
||||
* [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
|
||||
*
|
||||
* i.e. start with an address, followed by any number of bitmaps. The
|
||||
* address entry encodes 1 relocation. The subsequent bitmap entries
|
||||
* encode up to 63 relocations each, at subsequent offsets following
|
||||
* the last address entry.
|
||||
*
|
||||
* The bitmap entries must have 1 in the least significant bit. The
|
||||
* assumption here is that an address cannot have 1 in lsb. Odd
|
||||
* addresses are not supported. Any odd addresses are stored in the RELA
|
||||
* section, which is handled above.
|
||||
*
|
||||
* Excluding the least significant bit in the bitmap, each non-zero
|
||||
* bit in the bitmap represents a relocation to be applied to
|
||||
* a corresponding machine word that follows the base address
|
||||
* word. The second least significant bit represents the machine
|
||||
* word immediately following the initial address, and each bit
|
||||
* that follows represents the next word, in linear order. As such,
|
||||
* a single bitmap can encode up to 63 relocations in a 64-bit object.
|
||||
*
|
||||
* In this implementation we store the address of the next RELR table
|
||||
* entry in x9, the address being relocated by the current address or
|
||||
* bitmap entry in x13 and the address being relocated by the current
|
||||
* bit in x14.
|
||||
*
|
||||
* Because addends are stored in place in the binary, RELR relocations
|
||||
* cannot be applied idempotently. We use x24 to keep track of the
|
||||
* currently applied displacement so that we can correctly relocate if
|
||||
* __relocate_kernel is called twice with non-zero displacements (i.e.
|
||||
* if there is both a physical misalignment and a KASLR displacement).
|
||||
*/
|
||||
ldr w9, =__relr_offset // offset to reloc table
|
||||
ldr w10, =__relr_size // size of reloc table
|
||||
add x9, x9, x11 // __va(.relr)
|
||||
add x10, x9, x10 // __va(.relr) + sizeof(.relr)
|
||||
|
||||
sub x15, x23, x24 // delta from previous offset
|
||||
cbz x15, 7f // nothing to do if unchanged
|
||||
mov x24, x23 // save new offset
|
||||
|
||||
2: cmp x9, x10
|
||||
b.hs 7f
|
||||
ldr x11, [x9], #8
|
||||
tbnz x11, #0, 3f // branch to handle bitmaps
|
||||
add x13, x11, x23
|
||||
ldr x12, [x13] // relocate address entry
|
||||
add x12, x12, x15
|
||||
str x12, [x13], #8 // adjust to start of bitmap
|
||||
b 2b
|
||||
|
||||
3: mov x14, x13
|
||||
4: lsr x11, x11, #1
|
||||
cbz x11, 6f
|
||||
tbz x11, #0, 5f // skip bit if not set
|
||||
ldr x12, [x14] // relocate bit
|
||||
add x12, x12, x15
|
||||
str x12, [x14]
|
||||
|
||||
5: add x14, x14, #8 // move to next bit's address
|
||||
b 4b
|
||||
|
||||
6: /*
|
||||
* Move to the next bitmap's address. 8 is the word size, and 63 is the
|
||||
* number of significant bits in a bitmap entry.
|
||||
*/
|
||||
add x13, x13, #(8 * 63)
|
||||
b 2b
|
||||
|
||||
7:
|
||||
#endif
|
||||
ret
|
||||
|
||||
ENDPROC(__relocate_kernel)
|
||||
#endif
|
||||
|
||||
@@ -719,6 +800,9 @@ __primary_switch:
|
||||
|
||||
bl __enable_mmu
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
#ifdef CONFIG_RELR
|
||||
mov x24, #0 // no RELR displacement yet
|
||||
#endif
|
||||
bl __relocate_kernel
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
ldr x8, =__primary_switched
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/user.h>
|
||||
#include <linux/delay.h>
|
||||
@@ -49,6 +50,7 @@
|
||||
#include <linux/notifier.h>
|
||||
#include <trace/events/power.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/prctl.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/compat.h>
|
||||
@@ -316,11 +318,18 @@ static void tls_thread_flush(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void flush_tagged_addr_state(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI))
|
||||
clear_thread_flag(TIF_TAGGED_ADDR);
|
||||
}
|
||||
|
||||
void flush_thread(void)
|
||||
{
|
||||
fpsimd_flush_thread();
|
||||
tls_thread_flush();
|
||||
flush_ptrace_hw_breakpoint(current);
|
||||
flush_tagged_addr_state();
|
||||
}
|
||||
|
||||
void release_thread(struct task_struct *dead_task)
|
||||
@@ -545,3 +554,70 @@ void arch_setup_new_exec(void)
|
||||
{
|
||||
current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
|
||||
/*
|
||||
* Control the relaxed ABI allowing tagged user addresses into the kernel.
|
||||
*/
|
||||
static unsigned int tagged_addr_disabled;
|
||||
|
||||
long set_tagged_addr_ctrl(unsigned long arg)
|
||||
{
|
||||
if (is_compat_task())
|
||||
return -EINVAL;
|
||||
if (arg & ~PR_TAGGED_ADDR_ENABLE)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Do not allow the enabling of the tagged address ABI if globally
|
||||
* disabled via sysctl abi.tagged_addr_disabled.
|
||||
*/
|
||||
if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
|
||||
return -EINVAL;
|
||||
|
||||
update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long get_tagged_addr_ctrl(void)
|
||||
{
|
||||
if (is_compat_task())
|
||||
return -EINVAL;
|
||||
|
||||
if (test_thread_flag(TIF_TAGGED_ADDR))
|
||||
return PR_TAGGED_ADDR_ENABLE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Global sysctl to disable the tagged user addresses support. This control
|
||||
* only prevents the tagged address ABI enabling via prctl() and does not
|
||||
* disable it for tasks that already opted in to the relaxed ABI.
|
||||
*/
|
||||
static int zero;
|
||||
static int one = 1;
|
||||
|
||||
static struct ctl_table tagged_addr_sysctl_table[] = {
|
||||
{
|
||||
.procname = "tagged_addr_disabled",
|
||||
.mode = 0644,
|
||||
.data = &tagged_addr_disabled,
|
||||
.maxlen = sizeof(int),
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
static int __init tagged_addr_init(void)
|
||||
{
|
||||
if (!register_sysctl("abi", tagged_addr_sysctl_table))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
core_initcall(tagged_addr_init);
|
||||
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
|
||||
|
||||
@@ -261,6 +261,11 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
setup_machine_fdt(__fdt_pointer);
|
||||
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code and early parameters.
|
||||
*/
|
||||
jump_label_init();
|
||||
parse_early_param();
|
||||
|
||||
/*
|
||||
@@ -306,6 +311,9 @@ void __init setup_arch(char **cmdline_p)
|
||||
smp_init_cpus();
|
||||
smp_build_mpidr_hash();
|
||||
|
||||
/* Init percpu seeds for random tags after cpus are set up. */
|
||||
kasan_init_tags();
|
||||
|
||||
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
|
||||
/*
|
||||
* Make sure init_thread_info.ttbr0 always generates translation
|
||||
|
||||
@@ -457,11 +457,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
|
||||
void __init smp_prepare_boot_cpu(void)
|
||||
{
|
||||
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code.
|
||||
*/
|
||||
jump_label_init();
|
||||
cpuinfo_store_boot_cpu();
|
||||
save_boot_cpu_run_el();
|
||||
/*
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/kasan.h>
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/bug.h>
|
||||
@@ -839,6 +840,58 @@ static struct break_hook bug_break_hook = {
|
||||
.fn = bug_handler,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
|
||||
#define KASAN_ESR_RECOVER 0x20
|
||||
#define KASAN_ESR_WRITE 0x10
|
||||
#define KASAN_ESR_SIZE_MASK 0x0f
|
||||
#define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
|
||||
|
||||
static int kasan_handler(struct pt_regs *regs, unsigned int esr)
|
||||
{
|
||||
bool recover = esr & KASAN_ESR_RECOVER;
|
||||
bool write = esr & KASAN_ESR_WRITE;
|
||||
size_t size = KASAN_ESR_SIZE(esr);
|
||||
u64 addr = regs->regs[0];
|
||||
u64 pc = regs->pc;
|
||||
|
||||
if (user_mode(regs))
|
||||
return DBG_HOOK_ERROR;
|
||||
|
||||
kasan_report(addr, size, write, pc);
|
||||
|
||||
/*
|
||||
* The instrumentation allows to control whether we can proceed after
|
||||
* a crash was detected. This is done by passing the -recover flag to
|
||||
* the compiler. Disabling recovery allows to generate more compact
|
||||
* code.
|
||||
*
|
||||
* Unfortunately disabling recovery doesn't work for the kernel right
|
||||
* now. KASAN reporting is disabled in some contexts (for example when
|
||||
* the allocator accesses slab object metadata; this is controlled by
|
||||
* current->kasan_depth). All these accesses are detected by the tool,
|
||||
* even though the reports for them are not printed.
|
||||
*
|
||||
* This is something that might be fixed at some point in the future.
|
||||
*/
|
||||
if (!recover)
|
||||
die("Oops - KASAN", regs, 0);
|
||||
|
||||
/* If thread survives, skip over the brk instruction and continue: */
|
||||
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
|
||||
return DBG_HOOK_HANDLED;
|
||||
}
|
||||
|
||||
#define KASAN_ESR_VAL (0xf2000000 | KASAN_BRK_IMM)
|
||||
#define KASAN_ESR_MASK 0xffffff00
|
||||
|
||||
static struct break_hook kasan_break_hook = {
|
||||
.esr_val = KASAN_ESR_VAL,
|
||||
.esr_mask = KASAN_ESR_MASK,
|
||||
.fn = kasan_handler,
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initial handler for AArch64 BRK exceptions
|
||||
* This handler only used until debug_traps_init().
|
||||
@@ -846,6 +899,10 @@ static struct break_hook bug_break_hook = {
|
||||
int __init early_brk64(unsigned long addr, unsigned int esr,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
if ((esr & KASAN_ESR_MASK) == KASAN_ESR_VAL)
|
||||
return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
|
||||
#endif
|
||||
return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
|
||||
}
|
||||
|
||||
@@ -853,4 +910,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
|
||||
void __init trap_init(void)
|
||||
{
|
||||
register_break_hook(&bug_break_hook);
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
register_break_hook(&kasan_break_hook);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -184,6 +184,15 @@ SECTIONS
|
||||
__rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
|
||||
__rela_size = SIZEOF(.rela.dyn);
|
||||
|
||||
#ifdef CONFIG_RELR
|
||||
.relr.dyn : ALIGN(8) {
|
||||
*(.relr.dyn)
|
||||
}
|
||||
|
||||
__relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
|
||||
__relr_size = SIZEOF(.relr.dyn);
|
||||
#endif
|
||||
|
||||
. = ALIGN(SEGMENT_ALIGN);
|
||||
__initdata_end = .;
|
||||
__init_end = .;
|
||||
|
||||
@@ -22,7 +22,6 @@ config KVM
|
||||
depends on OF
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
select KVM_MMIO
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
* Returns:
|
||||
* x0 - address of first occurrence of 'c' or 0
|
||||
*/
|
||||
ENTRY(memchr)
|
||||
WEAK(memchr)
|
||||
and w1, w1, #0xff
|
||||
1: subs x2, x2, #1
|
||||
b.mi 2f
|
||||
|
||||
@@ -58,7 +58,7 @@ pos .req x11
|
||||
limit_wd .req x12
|
||||
mask .req x13
|
||||
|
||||
ENTRY(memcmp)
|
||||
WEAK(memcmp)
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
tst tmp1, #7
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
* Returns:
|
||||
* x0 - address of first occurrence of 'c' or 0
|
||||
*/
|
||||
ENTRY(strchr)
|
||||
WEAK(strchr)
|
||||
and w1, w1, #0xff
|
||||
1: ldrb w2, [x0], #1
|
||||
cmp w2, w1
|
||||
|
||||
@@ -60,7 +60,7 @@ tmp3 .req x9
|
||||
zeroones .req x10
|
||||
pos .req x11
|
||||
|
||||
ENTRY(strcmp)
|
||||
WEAK(strcmp)
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
tst tmp1, #7
|
||||
|
||||
@@ -56,7 +56,7 @@ pos .req x12
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
ENTRY(strlen)
|
||||
WEAK(strlen)
|
||||
mov zeroones, #REP8_01
|
||||
bic src, srcin, #15
|
||||
ands tmp1, srcin, #15
|
||||
|
||||
@@ -64,7 +64,7 @@ limit_wd .req x13
|
||||
mask .req x14
|
||||
endloop .req x15
|
||||
|
||||
ENTRY(strncmp)
|
||||
WEAK(strncmp)
|
||||
cbz limit, .Lret0
|
||||
eor tmp1, src1, src2
|
||||
mov zeroones, #REP8_01
|
||||
|
||||
@@ -59,7 +59,7 @@ limit_wd .req x14
|
||||
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
||||
#define REP8_80 0x8080808080808080
|
||||
|
||||
ENTRY(strnlen)
|
||||
WEAK(strnlen)
|
||||
cbz limit, .Lhit_limit
|
||||
mov zeroones, #REP8_01
|
||||
bic src, srcin, #15
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
* Returns:
|
||||
* x0 - address of last occurrence of 'c' or 0
|
||||
*/
|
||||
ENTRY(strrchr)
|
||||
WEAK(strrchr)
|
||||
mov x3, #0
|
||||
and w1, w1, #0xff
|
||||
1: ldrb w2, [x0], #1
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include <asm/exception.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/esr.h>
|
||||
#include <asm/kasan.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/system_misc.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -126,6 +127,18 @@ static void mem_abort_decode(unsigned int esr)
|
||||
data_abort_decode(esr);
|
||||
}
|
||||
|
||||
static inline bool is_ttbr0_addr(unsigned long addr)
|
||||
{
|
||||
/* entry assembly clears tags for TTBR0 addrs */
|
||||
return addr < TASK_SIZE;
|
||||
}
|
||||
|
||||
static inline bool is_ttbr1_addr(unsigned long addr)
|
||||
{
|
||||
/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
|
||||
return arch_kasan_reset_tag(addr) >= VA_START;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump out the page tables associated with 'addr' in the currently active mm.
|
||||
*/
|
||||
@@ -134,7 +147,7 @@ void show_pte(unsigned long addr)
|
||||
struct mm_struct *mm;
|
||||
pgd_t *pgd;
|
||||
|
||||
if (addr < TASK_SIZE) {
|
||||
if (is_ttbr0_addr(addr)) {
|
||||
/* TTBR0 */
|
||||
mm = current->active_mm;
|
||||
if (mm == &init_mm) {
|
||||
@@ -142,7 +155,7 @@ void show_pte(unsigned long addr)
|
||||
addr);
|
||||
return;
|
||||
}
|
||||
} else if (addr >= VA_START) {
|
||||
} else if (is_ttbr1_addr(addr)) {
|
||||
/* TTBR1 */
|
||||
mm = &init_mm;
|
||||
} else {
|
||||
@@ -242,7 +255,7 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
|
||||
if (fsc_type == ESR_ELx_FSC_PERM)
|
||||
return true;
|
||||
|
||||
if (addr < TASK_SIZE && system_uses_ttbr0_pan())
|
||||
if (is_ttbr0_addr(addr) && system_uses_ttbr0_pan())
|
||||
return fsc_type == ESR_ELx_FSC_FAULT &&
|
||||
(regs->pstate & PSR_PAN_BIT);
|
||||
|
||||
@@ -426,7 +439,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
|
||||
mm_flags |= FAULT_FLAG_WRITE;
|
||||
}
|
||||
|
||||
if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) {
|
||||
if (is_ttbr0_addr(addr) && is_permission_fault(esr, regs, addr)) {
|
||||
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
|
||||
if (regs->orig_addr_limit == KERNEL_DS)
|
||||
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
|
||||
@@ -590,7 +603,7 @@ static int __kprobes do_translation_fault(unsigned long addr,
|
||||
unsigned int esr,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (addr < TASK_SIZE)
|
||||
if (is_ttbr0_addr(addr))
|
||||
return do_page_fault(addr, esr, regs);
|
||||
|
||||
do_bad_area(addr, esr, regs);
|
||||
@@ -776,7 +789,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
|
||||
* re-enabled IRQs. If the address is a kernel address, apply
|
||||
* BP hardening prior to enabling IRQs and pre-emption.
|
||||
*/
|
||||
if (addr > TASK_SIZE)
|
||||
if (!is_ttbr0_addr(addr))
|
||||
arm64_apply_bp_hardening();
|
||||
|
||||
local_irq_enable();
|
||||
@@ -795,7 +808,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
if (instruction_pointer(regs) > TASK_SIZE)
|
||||
if (!is_ttbr0_addr(instruction_pointer(regs)))
|
||||
arm64_apply_bp_hardening();
|
||||
local_irq_enable();
|
||||
}
|
||||
@@ -863,6 +876,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
|
||||
unsigned long pc = instruction_pointer(regs);
|
||||
struct siginfo info;
|
||||
int rv;
|
||||
|
||||
@@ -873,7 +887,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
|
||||
if (interrupts_enabled(regs))
|
||||
trace_hardirqs_off();
|
||||
|
||||
if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE)
|
||||
if (user_mode(regs) && !is_ttbr0_addr(pc))
|
||||
arm64_apply_bp_hardening();
|
||||
|
||||
if (!inf->fn(addr, esr, regs)) {
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "kasan: " fmt
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched/task.h>
|
||||
@@ -35,77 +36,124 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
|
||||
* with the physical address from __pa_symbol.
|
||||
*/
|
||||
|
||||
static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
|
||||
unsigned long end)
|
||||
static phys_addr_t __init kasan_alloc_zeroed_page(int node)
|
||||
{
|
||||
void *p = memblock_virt_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
|
||||
__pa(MAX_DMA_ADDRESS),
|
||||
MEMBLOCK_ALLOC_KASAN, node);
|
||||
return __pa(p);
|
||||
}
|
||||
|
||||
static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
|
||||
bool early)
|
||||
{
|
||||
if (pmd_none(*pmd)) {
|
||||
phys_addr_t pte_phys = early ?
|
||||
__pa_symbol(kasan_early_shadow_pte)
|
||||
: kasan_alloc_zeroed_page(node);
|
||||
__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
|
||||
}
|
||||
|
||||
return early ? pte_offset_kimg(pmd, addr)
|
||||
: pte_offset_kernel(pmd, addr);
|
||||
}
|
||||
|
||||
static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
|
||||
bool early)
|
||||
{
|
||||
if (pud_none(*pud)) {
|
||||
phys_addr_t pmd_phys = early ?
|
||||
__pa_symbol(kasan_early_shadow_pmd)
|
||||
: kasan_alloc_zeroed_page(node);
|
||||
__pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
|
||||
}
|
||||
|
||||
return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
|
||||
}
|
||||
|
||||
static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
|
||||
bool early)
|
||||
{
|
||||
if (pgd_none(*pgd)) {
|
||||
phys_addr_t pud_phys = early ?
|
||||
__pa_symbol(kasan_early_shadow_pud)
|
||||
: kasan_alloc_zeroed_page(node);
|
||||
__pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
|
||||
}
|
||||
|
||||
return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
|
||||
}
|
||||
|
||||
static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
|
||||
unsigned long end, int node, bool early)
|
||||
{
|
||||
pte_t *pte;
|
||||
unsigned long next;
|
||||
pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
|
||||
|
||||
if (pmd_none(*pmd))
|
||||
__pmd_populate(pmd, __pa_symbol(kasan_zero_pte), PMD_TYPE_TABLE);
|
||||
|
||||
pte = pte_offset_kimg(pmd, addr);
|
||||
do {
|
||||
phys_addr_t page_phys = early ?
|
||||
__pa_symbol(kasan_early_shadow_page)
|
||||
: kasan_alloc_zeroed_page(node);
|
||||
if (!early)
|
||||
memset(__va(page_phys), KASAN_SHADOW_INIT, PAGE_SIZE);
|
||||
next = addr + PAGE_SIZE;
|
||||
set_pte(pte, pfn_pte(sym_to_pfn(kasan_zero_page),
|
||||
PAGE_KERNEL));
|
||||
set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
|
||||
} while (pte++, addr = next, addr != end && pte_none(*pte));
|
||||
}
|
||||
|
||||
static void __init kasan_early_pmd_populate(pud_t *pud,
|
||||
unsigned long addr,
|
||||
unsigned long end)
|
||||
static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
|
||||
unsigned long end, int node, bool early)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
unsigned long next;
|
||||
pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
|
||||
|
||||
if (pud_none(*pud))
|
||||
__pud_populate(pud, __pa_symbol(kasan_zero_pmd), PMD_TYPE_TABLE);
|
||||
|
||||
pmd = pmd_offset_kimg(pud, addr);
|
||||
do {
|
||||
next = pmd_addr_end(addr, end);
|
||||
kasan_early_pte_populate(pmd, addr, next);
|
||||
kasan_pte_populate(pmd, addr, next, node, early);
|
||||
} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
|
||||
}
|
||||
|
||||
static void __init kasan_early_pud_populate(pgd_t *pgd,
|
||||
unsigned long addr,
|
||||
unsigned long end)
|
||||
static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
|
||||
unsigned long end, int node, bool early)
|
||||
{
|
||||
pud_t *pud;
|
||||
unsigned long next;
|
||||
pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
|
||||
|
||||
if (pgd_none(*pgd))
|
||||
__pgd_populate(pgd, __pa_symbol(kasan_zero_pud), PUD_TYPE_TABLE);
|
||||
|
||||
pud = pud_offset_kimg(pgd, addr);
|
||||
do {
|
||||
next = pud_addr_end(addr, end);
|
||||
kasan_early_pmd_populate(pud, addr, next);
|
||||
kasan_pmd_populate(pud, addr, next, node, early);
|
||||
} while (pud++, addr = next, addr != end && pud_none(*pud));
|
||||
}
|
||||
|
||||
static void __init kasan_map_early_shadow(void)
|
||||
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
|
||||
int node, bool early)
|
||||
{
|
||||
unsigned long addr = KASAN_SHADOW_START;
|
||||
unsigned long end = KASAN_SHADOW_END;
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
|
||||
pgd = pgd_offset_k(addr);
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
kasan_early_pud_populate(pgd, addr, next);
|
||||
kasan_pud_populate(pgd, addr, next, node, early);
|
||||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
/* The early shadow maps everything to a single page of zeroes */
|
||||
asmlinkage void __init kasan_early_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
|
||||
BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
|
||||
KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
|
||||
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
|
||||
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
|
||||
kasan_map_early_shadow();
|
||||
kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE,
|
||||
true);
|
||||
}
|
||||
|
||||
/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
|
||||
static void __init kasan_map_populate(unsigned long start, unsigned long end,
|
||||
int node)
|
||||
{
|
||||
kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -142,15 +190,15 @@ void __init kasan_init(void)
|
||||
struct memblock_region *reg;
|
||||
int i;
|
||||
|
||||
kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
|
||||
kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
|
||||
kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK;
|
||||
kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end));
|
||||
|
||||
mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
|
||||
mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
|
||||
|
||||
/*
|
||||
* We are going to perform proper setup of shadow memory.
|
||||
* At first we should unmap early shadow (clear_pgds() call bellow).
|
||||
* At first we should unmap early shadow (clear_pgds() call below).
|
||||
* However, instrumented code couldn't execute without shadow memory.
|
||||
* tmp_pg_dir used to keep early shadow mapped until full shadow
|
||||
* setup will be finished.
|
||||
@@ -161,28 +209,17 @@ void __init kasan_init(void)
|
||||
|
||||
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
|
||||
vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
|
||||
pfn_to_nid(virt_to_pfn(lm_alias(_text))));
|
||||
kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
|
||||
early_pfn_to_nid(virt_to_pfn(lm_alias(_text))));
|
||||
|
||||
/*
|
||||
* vmemmap_populate() has populated the shadow region that covers the
|
||||
* kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
|
||||
* the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
|
||||
* kasan_populate_zero_shadow() from replacing the page table entries
|
||||
* (PMD or PTE) at the edges of the shadow region for the kernel
|
||||
* image.
|
||||
*/
|
||||
kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE);
|
||||
kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
|
||||
|
||||
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
|
||||
(void *)mod_shadow_start);
|
||||
kasan_populate_zero_shadow((void *)kimg_shadow_end,
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET));
|
||||
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
|
||||
(void *)mod_shadow_start);
|
||||
kasan_populate_early_shadow((void *)kimg_shadow_end,
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET));
|
||||
|
||||
if (kimg_shadow_start > mod_shadow_end)
|
||||
kasan_populate_zero_shadow((void *)mod_shadow_end,
|
||||
(void *)kimg_shadow_start);
|
||||
kasan_populate_early_shadow((void *)mod_shadow_end,
|
||||
(void *)kimg_shadow_start);
|
||||
|
||||
for_each_memblock(memory, reg) {
|
||||
void *start = (void *)__phys_to_virt(reg->base);
|
||||
@@ -191,20 +228,21 @@ void __init kasan_init(void)
|
||||
if (start >= end)
|
||||
break;
|
||||
|
||||
vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
|
||||
(unsigned long)kasan_mem_to_shadow(end),
|
||||
pfn_to_nid(virt_to_pfn(start)));
|
||||
kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
|
||||
(unsigned long)kasan_mem_to_shadow(end),
|
||||
early_pfn_to_nid(virt_to_pfn(start)));
|
||||
}
|
||||
|
||||
/*
|
||||
* KAsan may reuse the contents of kasan_zero_pte directly, so we
|
||||
* should make sure that it maps the zero page read-only.
|
||||
* KAsan may reuse the contents of kasan_early_shadow_pte directly,
|
||||
* so we should make sure that it maps the zero page read-only.
|
||||
*/
|
||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||
set_pte(&kasan_zero_pte[i],
|
||||
pfn_pte(sym_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
|
||||
set_pte(&kasan_early_shadow_pte[i],
|
||||
pfn_pte(sym_to_pfn(kasan_early_shadow_page),
|
||||
PAGE_KERNEL_RO));
|
||||
|
||||
memset(kasan_zero_page, 0, PAGE_SIZE);
|
||||
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
|
||||
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
|
||||
|
||||
/* At this point kasan is fully initialized. Enable error messages */
|
||||
|
||||
@@ -41,6 +41,12 @@
|
||||
/* PTWs cacheable, inner/outer WBWA */
|
||||
#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA
|
||||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
#define TCR_KASAN_FLAGS TCR_TBI1
|
||||
#else
|
||||
#define TCR_KASAN_FLAGS 0
|
||||
#endif
|
||||
|
||||
#define MAIR(attr, mt) ((attr) << ((mt) * 8))
|
||||
|
||||
/*
|
||||
@@ -280,6 +286,15 @@ skip_pgd:
|
||||
msr sctlr_el1, x18
|
||||
isb
|
||||
|
||||
/*
|
||||
* Invalidate the local I-cache so that any instructions fetched
|
||||
* speculatively from the PoC are discarded, since they may have
|
||||
* been dynamically patched at the PoU.
|
||||
*/
|
||||
ic iallu
|
||||
dsb nsh
|
||||
isb
|
||||
|
||||
/* Set the flag to zero to indicate that we're all done */
|
||||
str wzr, [flag_ptr]
|
||||
ret
|
||||
@@ -431,7 +446,8 @@ ENTRY(__cpu_setup)
|
||||
* both user and kernel.
|
||||
*/
|
||||
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
|
||||
TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
|
||||
TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1 | \
|
||||
TCR_KASAN_FLAGS
|
||||
tcr_set_idmap_t0sz x10, x9
|
||||
|
||||
/*
|
||||
|
||||
@@ -943,3 +943,25 @@ out:
|
||||
tmp : orig_prog);
|
||||
return prog;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CFI_CLANG
|
||||
bool arch_bpf_jit_check_func(const struct bpf_prog *prog)
|
||||
{
|
||||
const uintptr_t func = (const uintptr_t)prog->bpf_func;
|
||||
|
||||
/*
|
||||
* bpf_func must be correctly aligned and within the correct region.
|
||||
* module_alloc places JIT code in the module region, unless
|
||||
* ARM64_MODULE_PLTS is enabled, in which case we might end up using
|
||||
* the vmalloc region too.
|
||||
*/
|
||||
if (unlikely(!IS_ALIGNED(func, sizeof(u32))))
|
||||
return false;
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
|
||||
is_vmalloc_addr(prog->bpf_func))
|
||||
return true;
|
||||
|
||||
return (func >= MODULES_VADDR && func < MODULES_END);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -913,8 +913,12 @@ module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mo
|
||||
void
|
||||
module_arch_cleanup (struct module *mod)
|
||||
{
|
||||
if (mod->arch.init_unw_table)
|
||||
if (mod->arch.init_unw_table) {
|
||||
unw_remove_unwind_table(mod->arch.init_unw_table);
|
||||
if (mod->arch.core_unw_table)
|
||||
mod->arch.init_unw_table = NULL;
|
||||
}
|
||||
if (mod->arch.core_unw_table) {
|
||||
unw_remove_unwind_table(mod->arch.core_unw_table);
|
||||
mod->arch.core_unw_table = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -800,7 +800,6 @@ config SIBYTE_SWARM
|
||||
select SYS_SUPPORTS_HIGHMEM
|
||||
select SYS_SUPPORTS_LITTLE_ENDIAN
|
||||
select ZONE_DMA32 if 64BIT
|
||||
select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI
|
||||
|
||||
config SIBYTE_LITTLESUR
|
||||
bool "Sibyte BCM91250C2-LittleSur"
|
||||
@@ -823,7 +822,6 @@ config SIBYTE_SENTOSA
|
||||
select SYS_HAS_CPU_SB1
|
||||
select SYS_SUPPORTS_BIG_ENDIAN
|
||||
select SYS_SUPPORTS_LITTLE_ENDIAN
|
||||
select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI
|
||||
|
||||
config SIBYTE_BIGSUR
|
||||
bool "Sibyte BCM91480B-BigSur"
|
||||
@@ -837,7 +835,6 @@ config SIBYTE_BIGSUR
|
||||
select SYS_SUPPORTS_HIGHMEM
|
||||
select SYS_SUPPORTS_LITTLE_ENDIAN
|
||||
select ZONE_DMA32 if 64BIT
|
||||
select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI
|
||||
|
||||
config SNI_RM
|
||||
bool "SNI RM200/300/400"
|
||||
|
||||
@@ -66,7 +66,7 @@ CONFIG_HID_MONTEREY=y
|
||||
CONFIG_EXT4_FS=y
|
||||
CONFIG_EXT4_FS_POSIX_ACL=y
|
||||
CONFIG_EXT4_FS_SECURITY=y
|
||||
CONFIG_EXT4_ENCRYPTION=y
|
||||
CONFIG_FS_ENCRYPTION=y
|
||||
CONFIG_FANOTIFY=y
|
||||
CONFIG_FUSE_FS=y
|
||||
CONFIG_CUSE=y
|
||||
|
||||
@@ -623,7 +623,6 @@ CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_EMI62=m
|
||||
CONFIG_USB_EMI26=m
|
||||
CONFIG_USB_ADUTUX=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYPRESS_CY7C63=m
|
||||
|
||||
@@ -344,7 +344,6 @@ CONFIG_USB_SERIAL_SAFE_PADDED=y
|
||||
CONFIG_USB_SERIAL_CYBERJACK=m
|
||||
CONFIG_USB_SERIAL_XIRCOM=m
|
||||
CONFIG_USB_SERIAL_OMNINET=m
|
||||
CONFIG_USB_RIO500=m
|
||||
CONFIG_USB_LEGOTOWER=m
|
||||
CONFIG_USB_LCD=m
|
||||
CONFIG_USB_CYTHERM=m
|
||||
|
||||
@@ -25,7 +25,17 @@ extern cpumask_t cpu_sibling_map[];
|
||||
extern cpumask_t cpu_core_map[];
|
||||
extern cpumask_t cpu_foreign_map[];
|
||||
|
||||
#define raw_smp_processor_id() (current_thread_info()->cpu)
|
||||
static inline int raw_smp_processor_id(void)
|
||||
{
|
||||
#if defined(__VDSO__)
|
||||
extern int vdso_smp_processor_id(void)
|
||||
__compiletime_error("VDSO should not call smp_processor_id()");
|
||||
return vdso_smp_processor_id();
|
||||
#else
|
||||
return current_thread_info()->cpu;
|
||||
#endif
|
||||
}
|
||||
#define raw_smp_processor_id raw_smp_processor_id
|
||||
|
||||
/* Map from cpu id to sequential logical cpu number. This will only
|
||||
not be idempotent when cpus failed to come on-line. */
|
||||
|
||||
@@ -20,7 +20,6 @@ config KVM
|
||||
depends on HAVE_KVM
|
||||
select EXPORT_UASM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_MMIO
|
||||
select MMU_NOTIFIER
|
||||
|
||||
@@ -43,6 +43,10 @@ else
|
||||
$(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
|
||||
endif
|
||||
|
||||
# Some -march= flags enable MMI instructions, and GCC complains about that
|
||||
# support being enabled alongside -msoft-float. Thus explicitly disable MMI.
|
||||
cflags-y += $(call cc-option,-mno-loongson-mmi)
|
||||
|
||||
#
|
||||
# Loongson Machines' Support
|
||||
#
|
||||
|
||||
@@ -634,7 +634,7 @@ static __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
|
||||
return;
|
||||
}
|
||||
|
||||
if (cpu_has_rixi && _PAGE_NO_EXEC) {
|
||||
if (cpu_has_rixi && !!_PAGE_NO_EXEC) {
|
||||
if (fill_includes_sw_bits) {
|
||||
UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL));
|
||||
} else {
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
obj-y := cfe.o
|
||||
obj-$(CONFIG_SWIOTLB) += dma.o
|
||||
obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o
|
||||
obj-$(CONFIG_SIBYTE_CFE_CONSOLE) += cfe_console.o
|
||||
obj-$(CONFIG_SIBYTE_TBPROF) += sb_tbprof.o
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
/*
|
||||
* DMA support for Broadcom SiByte platforms.
|
||||
*
|
||||
* Copyright (c) 2018 Maciej W. Rozycki
|
||||
*/
|
||||
|
||||
#include <linux/swiotlb.h>
|
||||
#include <asm/bootinfo.h>
|
||||
|
||||
void __init plat_swiotlb_setup(void)
|
||||
{
|
||||
swiotlb_init(1);
|
||||
}
|
||||
@@ -7,7 +7,10 @@ ccflags-vdso := \
|
||||
$(filter -I%,$(KBUILD_CFLAGS)) \
|
||||
$(filter -E%,$(KBUILD_CFLAGS)) \
|
||||
$(filter -mmicromips,$(KBUILD_CFLAGS)) \
|
||||
$(filter -march=%,$(KBUILD_CFLAGS))
|
||||
$(filter -march=%,$(KBUILD_CFLAGS)) \
|
||||
$(filter -m%-float,$(KBUILD_CFLAGS)) \
|
||||
$(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \
|
||||
-D__VDSO__
|
||||
cflags-vdso := $(ccflags-vdso) \
|
||||
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
|
||||
-O2 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
|
||||
|
||||
@@ -59,8 +59,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
|
||||
|
||||
pagefault_enable();
|
||||
|
||||
if (!ret)
|
||||
*oval = oldval;
|
||||
*oval = oldval;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -266,7 +266,7 @@ int64_t opal_xive_get_vp_info(uint64_t vp,
|
||||
int64_t opal_xive_set_vp_info(uint64_t vp,
|
||||
uint64_t flags,
|
||||
uint64_t report_cl_pair);
|
||||
int64_t opal_xive_allocate_irq(uint32_t chip_id);
|
||||
int64_t opal_xive_allocate_irq_raw(uint32_t chip_id);
|
||||
int64_t opal_xive_free_irq(uint32_t girq);
|
||||
int64_t opal_xive_sync(uint32_t type, uint32_t id);
|
||||
int64_t opal_xive_dump(uint32_t type, uint32_t id);
|
||||
|
||||
@@ -280,6 +280,7 @@ extern unsigned long __copy_tofrom_user(void __user *to,
|
||||
static inline unsigned long
|
||||
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
barrier_nospec();
|
||||
return __copy_tofrom_user(to, from, n);
|
||||
}
|
||||
#endif /* __powerpc64__ */
|
||||
|
||||
@@ -464,6 +464,10 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
|
||||
RFI_TO_USER_OR_KERNEL
|
||||
9:
|
||||
/* Deliver the machine check to host kernel in V mode. */
|
||||
BEGIN_FTR_SECTION
|
||||
ld r10,ORIG_GPR3(r1)
|
||||
mtspr SPRN_CFAR,r10
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
||||
MACHINE_CHECK_HANDLER_WINDUP
|
||||
b machine_check_pSeries
|
||||
|
||||
|
||||
@@ -897,6 +897,7 @@ p_toc: .8byte __toc_start + 0x8000 - 0b
|
||||
/*
|
||||
* This is where the main kernel code starts.
|
||||
*/
|
||||
__REF
|
||||
start_here_multiplatform:
|
||||
/* set up the TOC */
|
||||
bl relative_toc
|
||||
@@ -972,6 +973,7 @@ start_here_multiplatform:
|
||||
RFI
|
||||
b . /* prevent speculative execution */
|
||||
|
||||
.previous
|
||||
/* This is where all platforms converge execution */
|
||||
|
||||
start_here_common:
|
||||
|
||||
@@ -475,13 +475,14 @@ void giveup_all(struct task_struct *tsk)
|
||||
if (!tsk->thread.regs)
|
||||
return;
|
||||
|
||||
check_if_tm_restore_required(tsk);
|
||||
|
||||
usermsr = tsk->thread.regs->msr;
|
||||
|
||||
if ((usermsr & msr_all_available) == 0)
|
||||
return;
|
||||
|
||||
msr_check_and_set(msr_all_available);
|
||||
check_if_tm_restore_required(tsk);
|
||||
|
||||
WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
|
||||
|
||||
|
||||
@@ -874,15 +874,17 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
|
||||
return 0;
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
struct device *dev = get_cpu_device(cpu);
|
||||
|
||||
switch (state) {
|
||||
case DOWN:
|
||||
cpuret = cpu_down(cpu);
|
||||
cpuret = device_offline(dev);
|
||||
break;
|
||||
case UP:
|
||||
cpuret = cpu_up(cpu);
|
||||
cpuret = device_online(dev);
|
||||
break;
|
||||
}
|
||||
if (cpuret) {
|
||||
if (cpuret < 0) {
|
||||
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
|
||||
__func__,
|
||||
((state == UP) ? "up" : "down"),
|
||||
@@ -971,6 +973,8 @@ int rtas_ibm_suspend_me(u64 handle)
|
||||
data.token = rtas_token("ibm,suspend-me");
|
||||
data.complete = &done;
|
||||
|
||||
lock_device_hotplug();
|
||||
|
||||
/* All present CPUs must be online */
|
||||
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
|
||||
cpuret = rtas_online_cpus_mask(offline_mask);
|
||||
@@ -1002,6 +1006,7 @@ int rtas_ibm_suspend_me(u64 handle)
|
||||
__func__);
|
||||
|
||||
out:
|
||||
unlock_device_hotplug();
|
||||
free_cpumask_var(offline_mask);
|
||||
return atomic_read(&data.error);
|
||||
}
|
||||
|
||||
@@ -20,7 +20,6 @@ if VIRTUALIZATION
|
||||
config KVM
|
||||
bool
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_EVENTFD
|
||||
select SRCU
|
||||
select KVM_VFIO
|
||||
|
||||
@@ -566,8 +566,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
|
||||
if (kvmppc_gpa_to_ua(vcpu->kvm,
|
||||
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
|
||||
&ua, NULL))
|
||||
return H_PARAMETER;
|
||||
&ua, NULL)) {
|
||||
ret = H_PARAMETER;
|
||||
goto unlock_exit;
|
||||
}
|
||||
|
||||
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
|
||||
ret = kvmppc_tce_iommu_map(vcpu->kvm,
|
||||
|
||||
@@ -475,8 +475,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
ua = 0;
|
||||
if (kvmppc_gpa_to_ua(vcpu->kvm,
|
||||
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
|
||||
&ua, NULL))
|
||||
return H_PARAMETER;
|
||||
&ua, NULL)) {
|
||||
ret = H_PARAMETER;
|
||||
goto unlock_exit;
|
||||
}
|
||||
|
||||
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
|
||||
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
|
||||
|
||||
@@ -1356,7 +1356,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
||||
*val = get_reg_val(id, vcpu->arch.pspb);
|
||||
break;
|
||||
case KVM_REG_PPC_DPDES:
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
|
||||
/*
|
||||
* On POWER9, where we are emulating msgsndp etc.,
|
||||
* we return 1 bit for each vcpu, which can come from
|
||||
* either vcore->dpdes or doorbell_request.
|
||||
* On POWER8, doorbell_request is 0.
|
||||
*/
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->dpdes |
|
||||
vcpu->arch.doorbell_request);
|
||||
break;
|
||||
case KVM_REG_PPC_VTB:
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
|
||||
|
||||
@@ -1001,20 +1001,22 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
/* Mask the VP IPI */
|
||||
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues & associated interrupts */
|
||||
/* Free escalations */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
/* Free the escalation irq */
|
||||
if (xc->esc_virq[i]) {
|
||||
free_irq(xc->esc_virq[i], vcpu);
|
||||
irq_dispose_mapping(xc->esc_virq[i]);
|
||||
kfree(xc->esc_virq_names[i]);
|
||||
}
|
||||
/* Free the queue */
|
||||
}
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
xive_native_disable_queue(xc->vp_id, q, i);
|
||||
if (q->qpage) {
|
||||
free_pages((unsigned long)q->qpage,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user