Snap for 8082672 from 4a6fbaa7e6 to android13-5.15-keystone-qcom-release

Change-Id: I90693d683d91bbbd3235e0e79959c023672b9456
This commit is contained in:
Android Build Coastguard Worker
2022-01-15 01:00:37 +00:00
79 changed files with 5481 additions and 660 deletions

View File

@@ -2082,6 +2082,9 @@
1 - Bypass the IOMMU for DMA.
unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
ioremap_guard [ARM64] enable the KVM MMIO guard functionality
if available.
io7= [HW] IO7 for Marvel-based Alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
@@ -2378,7 +2381,6 @@
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
Not valid if the kernel is running in EL2.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation

View File

@@ -0,0 +1,45 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/reserved-memory/google,open-dice.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Open Profile for DICE Device Tree Bindings
description: |
This binding represents a reserved memory region containing data
generated by the Open Profile for DICE protocol.
See https://pigweed.googlesource.com/open-dice/
maintainers:
- David Brazdil <dbrazdil@google.com>
allOf:
- $ref: "reserved-memory.yaml"
properties:
compatible:
const: google,open-dice
reg:
description: page-aligned region of memory containing DICE data
required:
- compatible
- reg
unevaluatedProperties: false
examples:
- |
reserved-memory {
#address-cells = <2>;
#size-cells = <1>;
dice: dice@12340000 {
compatible = "google,open-dice";
reg = <0x00 0x12340000 0x2000>;
no-map;
};
};

View File

@@ -11,3 +11,4 @@ ARM
psci
pvtime
ptp_kvm
mmio-guard

View File

@@ -0,0 +1,74 @@
.. SPDX-License-Identifier: GPL-2.0
==============
KVM MMIO guard
==============
KVM implements device emulation by handling translation faults to any
IPA range that is not contained in a memory slot. Such a translation
fault is in most cases passed on to userspace (or in rare cases to the
host kernel) with the address, size and possibly data of the access
for emulation.
Should the guest exit with an address that is not one that corresponds
to an emulatable device, userspace may take measures that are not the
most graceful as far as the guest is concerned (such as terminating it
or delivering a fatal exception).
There is also an element of trust: by forwarding the request to
userspace, the kernel assumes that the guest trusts userspace to do
the right thing.
The KVM MMIO guard offers a way to mitigate this last point: a guest
can request that only certain regions of the IPA space are valid as
MMIO. Only these regions will be handled as an MMIO, and any other
will result in an exception being delivered to the guest.
This relies on a set of hypercalls defined in the KVM-specific range,
using the HVC64 calling convention.
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO
============== ======== ================================
Function ID: (uint32) 0xC6000002
Arguments: none
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
(uint64) Protection Granule (PG) size in
bytes (r0)
============== ======== ================================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL
============== ======== ==============================
Function ID: (uint32) 0xC6000003
Arguments: none
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ==============================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP
============== ======== ====================================
Function ID: (uint32) 0xC6000004
Arguments: (uint64) The base of the PG-sized IPA range
that is allowed to be accessed as
MMIO. Must be aligned to the PG size
(r1)
(uint64) Index in the MAIR_EL1 register
providing the memory attribute that
is used by the guest (r2)
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ====================================
* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP
============== ======== ======================================
Function ID: (uint32) 0xC6000005
Arguments: (uint64) PG-sized IPA range aligned to the PG
size which has been previously mapped.
Must be aligned to the PG size and
have been previously mapped (r1)
Return Values: (int64) NOT_SUPPORTED(-1) on error, or
RET_SUCCESS(0) (r0)
============== ======== ======================================

View File

@@ -6,5 +6,6 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
void kvm_arm_init_hyp_services(void);
#endif

View File

@@ -25,9 +25,11 @@ config ARM64
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_IOREMAP_PHYS_HOOKS
select ARCH_HAS_KCOV
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL

View File

@@ -105,6 +105,8 @@ void __init early_fixmap_init(void);
extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
extern pte_t *__get_fixmap_pte(enum fixed_addresses idx);
#include <asm-generic/fixmap.h>
#endif /* !__ASSEMBLY__ */

View File

@@ -6,5 +6,8 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
void kvm_arm_init_hyp_services(void);
void kvm_init_memshare_services(void);
void kvm_init_ioremap_services(void);
#endif

View File

@@ -134,7 +134,7 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
* The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
* The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
@@ -342,6 +342,8 @@
#define PAR_TO_HPFAR(par) \
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
#define FAR_MASK GENMASK_ULL(11, 0)
#define ECN(x) { ESR_ELx_EC_##x, #x }
#define kvm_arm_exception_class \
@@ -359,4 +361,13 @@
#define CPACR_EL1_TTA (1 << 28)
#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
/*
* ARMv8 Reset Values
*/
#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
PSR_F_BIT | PSR_D_BIT)
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
#endif /* __ARM64_KVM_ARM_H__ */

View File

@@ -64,6 +64,8 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_reclaim_page,
__KVM_HOST_SMCCC_FUNC___pkvm_host_donate_guest,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
@@ -71,11 +73,13 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
__KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_init_shadow,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_shadow,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@@ -214,8 +218,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr);
extern void __vgic_v3_init_lrs(void);
extern u64 __kvm_get_mdcr_el2(void);

View File

@@ -41,6 +41,22 @@ void kvm_inject_vabt(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
static inline int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
{
/*
* For now make sure that both address/generic pointer authentication
* features are requested by the userspace together and the system
* supports these capabilities.
*/
if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
!test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) ||
!system_has_full_ptr_auth())
return -EINVAL;
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
return 0;
}
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.hcr_el2 & HCR_RW);
@@ -474,4 +490,34 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
return test_bit(feature, vcpu->arch.features);
}
/* Narrow the PSCI register arguments (r1 to r3) to 32 bits. */
static inline void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
{
int i;
/*
* Zero the input registers' upper 32 bits. They will be fully
* zeroed on exit, so we're fine changing them in place.
*/
for (i = 1; i < 4; i++)
vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i)));
}
static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
unsigned long affinity)
{
return !(affinity & ~MPIDR_HWID_BITMASK);
}
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
static inline unsigned long psci_affinity_mask(unsigned long affinity_level)
{
if (affinity_level <= 3)
return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
return 0;
}
#endif /* __ARM64_KVM_EMULATE_H__ */

View File

@@ -70,6 +70,63 @@ u32 __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
phys_addr_t head;
unsigned long nr_pages;
};
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
phys_addr_t *p,
phys_addr_t (*to_pa)(void *virt))
{
*p = mc->head;
mc->head = to_pa(p);
mc->nr_pages++;
}
static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
void *(*to_va)(phys_addr_t phys))
{
phys_addr_t *p = to_va(mc->head);
if (!mc->nr_pages)
return NULL;
mc->head = *p;
mc->nr_pages--;
return p;
}
static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
unsigned long min_pages,
void *(*alloc_fn)(void *arg),
phys_addr_t (*to_pa)(void *virt),
void *arg)
{
while (mc->nr_pages < min_pages) {
phys_addr_t *p = alloc_fn(arg);
if (!p)
return -ENOMEM;
push_hyp_memcache(mc, p, to_pa);
}
return 0;
}
static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
void (*free_fn)(void *virt, void *arg),
void *(*to_va)(phys_addr_t phys),
void *arg)
{
while (mc->nr_pages)
free_fn(pop_hyp_memcache(mc, to_va), arg);
}
void free_hyp_memcache(struct kvm_hyp_memcache *mc);
int topup_hyp_memcache(struct kvm_vcpu *vcpu);
struct kvm_vmid {
/* The VMID generation used for the virt. memory system */
u64 vmid_gen;
@@ -101,6 +158,20 @@ struct kvm_s2_mmu {
struct kvm_arch_memory_slot {
};
struct kvm_pinned_page {
struct list_head link;
struct page *page;
};
struct kvm_protected_vm {
bool enabled;
int shadow_handle;
struct mutex shadow_lock;
struct kvm_hyp_memcache teardown_mc;
struct list_head pinned_pages;
gpa_t pvmfw_load_addr;
};
struct kvm_arch {
struct kvm_s2_mmu mmu;
@@ -122,7 +193,12 @@ struct kvm_arch {
* should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
* supported.
*/
bool return_nisv_io_abort_to_user;
#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
/* Memory Tagging Extension enabled for the guest */
#define KVM_ARCH_FLAG_MTE_ENABLED 1
/* Guest has bought into the MMIO guard extension */
#define KVM_ARCH_FLAG_MMIO_GUARD 2
unsigned long flags;
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
@@ -134,8 +210,35 @@ struct kvm_arch {
u8 pfr0_csv2;
u8 pfr0_csv3;
/* Memory Tagging Extension enabled for the guest */
bool mte_enabled;
struct kvm_protected_vm pkvm;
u64 hypercall_exit_enabled;
};
struct kvm_protected_vcpu {
/* A unique id to the shadow structs in the hyp shadow area. */
int shadow_handle;
/* A pointer to the host's vcpu. */
struct kvm_vcpu *host_vcpu;
/* A pointer to the shadow vm. */
struct kvm_shadow_vm *shadow_vm;
/* Tracks exit code for the protected guest. */
int exit_code;
/*
* Track the power state transition of a protected vcpu.
* Can be in one of three states:
* PSCI_0_2_AFFINITY_LEVEL_ON
* PSCI_0_2_AFFINITY_LEVEL_OFF
* PSCI_0_2_AFFINITY_LEVEL_PENDING
*/
int power_state;
/* True if this vcpu is currently loaded on a cpu. */
bool loaded_on_cpu;
};
struct kvm_vcpu_fault_info {
@@ -367,8 +470,12 @@ struct kvm_vcpu_arch {
/* Don't run the guest (internal implementation need) */
bool pause;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
union {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
/* Pages to be donated to pkvm/EL2e if it runs out */
struct kvm_hyp_memcache pkvm_memcache;
};
/* Target CPU and feature flags */
int target;
@@ -389,6 +496,8 @@ struct kvm_vcpu_arch {
u64 last_steal;
gpa_t base;
} steal;
struct kvm_protected_vcpu pkvm;
};
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -446,6 +555,7 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_PKVM_STATE_DIRTY (1 << 15)
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
@@ -479,9 +589,6 @@ struct kvm_vcpu_arch {
#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
{
/*
@@ -573,6 +680,29 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
return true;
}
static inline u64 vcpu_arch_read_sys_reg(const struct kvm_vcpu_arch *vcpu_arch, int reg)
{
u64 val = 0x8badf00d8badf00d;
if (is_vhe_hyp_code() && vcpu_arch->sysregs_loaded_on_cpu &&
__vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return ctxt_sys_reg(&vcpu_arch->ctxt, reg);
}
static inline void vcpu_arch_write_sys_reg(struct kvm_vcpu_arch *vcpu_arch, u64 val, int reg)
{
if (is_vhe_hyp_code() && vcpu_arch->sysregs_loaded_on_cpu &&
__vcpu_write_sys_reg_to_cpu(val, reg))
return;
ctxt_sys_reg(&vcpu_arch->ctxt, reg) = val;
}
#define vcpu_read_sys_reg(vcpu, reg) vcpu_arch_read_sys_reg(&((vcpu)->arch), reg)
#define vcpu_write_sys_reg(vcpu, val, reg) vcpu_arch_write_sys_reg(&((vcpu)->arch), val, reg)
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
};
@@ -777,12 +907,7 @@ int kvm_set_ipa_limit(void);
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
static inline bool kvm_vm_is_protected(struct kvm *kvm)
{
return false;
}
#define kvm_vm_is_protected(kvm) ((kvm)->arch.pkvm.enabled)
void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
@@ -792,7 +917,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
#define kvm_has_mte(kvm) \
(system_supports_mte() && \
test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))

View File

@@ -61,8 +61,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
#ifdef __KVM_NVHE_HYPERVISOR__
@@ -123,18 +123,6 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
struct kvm_iommu_ops {
int (*init)(void);
bool (*host_smc_handler)(struct kvm_cpu_context *host_ctxt);
bool (*host_mmio_dabt_handler)(struct kvm_cpu_context *host_ctxt,
phys_addr_t fault_pa, unsigned int len,
bool is_write, int rd);
void (*host_stage2_set_owner)(phys_addr_t addr, size_t size, u8 owner_id);
int (*host_stage2_adjust_mmio_range)(phys_addr_t addr, phys_addr_t *start,
phys_addr_t *end);
};
extern struct kvm_iommu_ops kvm_iommu_ops;
extern const struct kvm_iommu_ops kvm_s2mpu_ops;
extern unsigned long kvm_nvhe_sym(__icache_flags);
extern bool kvm_nvhe_sym(smccc_trng_available);
#endif /* __ARM64_KVM_HYP_H__ */

View File

@@ -161,7 +161,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);

View File

@@ -288,6 +288,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
*/
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
/*
* kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
* @vtcr: Content of the VTCR register.
*
* Return: the size (in bytes) of the stage-2 PGD
*/
size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
/**
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
@@ -349,14 +357,16 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc);
/**
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
* track ownership.
* kvm_pgtable_stage2_annotate() - Unmap and annotate pages in the IPA space
* to track ownership (and more).
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Base intermediate physical address to annotate.
* @size: Size of the annotated range.
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
* page-table pages.
* @owner_id: Unique identifier for the owner of the page.
* @annotation: A 63 bit value that will be stored in the page tables.
* @annotation[0] must be 0, and @annotation[63:1] is stored
* in the page tables.
*
* By default, all page-tables are owned by identifier 0. This function can be
* used to mark portions of the IPA space as owned by other entities. When a
@@ -365,8 +375,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc, u8 owner_id);
int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc, kvm_pte_t annotation);
/**
* kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.

View File

@@ -2,18 +2,272 @@
/*
* Copyright (C) 2020 - Google LLC
* Author: Quentin Perret <qperret@google.com>
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_PKVM_H__
#define __ARM64_KVM_PKVM_H__
#include <linux/memblock.h>
#include <asm/kvm_pgtable.h>
#include <asm/sysreg.h>
/* Maximum number of protected VMs that can be created. */
#define KVM_MAX_PVMS 255
#define HYP_MEMBLOCK_REGIONS 128
#define PVMFW_INVALID_LOAD_ADDR (-1)
int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap);
int kvm_init_pvm(struct kvm *kvm, unsigned long type);
int create_el2_shadow(struct kvm *kvm);
/*
* Definitions for features to be allowed or restricted for guest virtual
* machines, depending on the mode KVM is running in and on the type of guest
* that is running.
*
* The ALLOW masks represent a bitmask of feature fields that are allowed
* without any restrictions as long as they are supported by the system.
*
* The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
* features that are restricted to support at most the specified feature.
*
* If a feature field is not present in either, than it is not supported.
*
* The approach taken for protected VMs is to allow features that are:
* - Needed by common Linux distributions (e.g., floating point)
* - Trivial to support, e.g., supporting the feature does not introduce or
* require tracking of additional state in KVM
* - Cannot be trapped or prevent the guest from using anyway
*/
/*
* Allow for protected VMs:
* - Floating-point and Advanced SIMD
* - Data Independent Timing
*/
#define PVM_ID_AA64PFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - AArch64 guests only (no support for AArch32 guests):
* AArch32 adds complexity in trap handling, emulation, condition codes,
* etc...
* - RAS (v1)
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
)
/*
* Allow for protected VMs:
* - Branch Target Identification
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
)
/*
* Allow for protected VMs:
* - Mixed-endian
* - Distinction between Secure and Non-secure Memory
* - Mixed-endian at EL0 only
* - Non-context synchronizing exception entry and exit
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - 40-bit IPA
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
)
/*
* Allow for protected VMs:
* - Hardware translation table updates to Access flag and Dirty state
* - Number of VMID bits from CPU
* - Hierarchical Permission Disables
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
)
/*
* Allow for protected VMs:
* - Common not Private translations
* - User Access Override
* - IESB bit in the SCTLR_ELx registers
* - Unaligned single-copy atomicity and atomic functions
* - ESR_ELx.EC value on an exception by read access to feature ID space
* - TTL field in address operations.
* - Break-before-make sequences when changing translation block size
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
)
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
* trapping at EL2
*/
#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
/*
* No support for debug, including breakpoints, and watchpoints for protected
* VMs:
* The Arm architecture mandates support for at least the Armv8 debug
* architecture, which would include at least 2 hardware breakpoints and
* watchpoints. Providing that support to protected guests adds
* considerable state and complexity. Therefore, the reserved value of 0 is
* used for debug-related fields.
*/
#define PVM_ID_AA64DFR0_ALLOW (0ULL)
#define PVM_ID_AA64DFR1_ALLOW (0ULL)
/*
* No support for implementation defined features.
*/
#define PVM_ID_AA64AFR0_ALLOW (0ULL)
#define PVM_ID_AA64AFR1_ALLOW (0ULL)
/*
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
)
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
)
/*
* Returns the maximum number of breakpoints supported for protected VMs.
*/
static inline int pkvm_get_max_brps(void)
{
int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS),
PVM_ID_AA64DFR0_ALLOW);
/*
* If breakpoints are supported, the maximum number is 1 + the field.
* Otherwise, return 0, which is not compliant with the architecture,
* but is reserved and is used here to indicate no debug support.
*/
return num ? num + 1 : 0;
}
/*
* Returns the maximum number of watchpoints supported for protected VMs.
*/
static inline int pkvm_get_max_wrps(void)
{
int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS),
PVM_ID_AA64DFR0_ALLOW);
return num ? num + 1 : 0;
}
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
extern phys_addr_t kvm_nvhe_sym(pvmfw_base);
extern phys_addr_t kvm_nvhe_sym(pvmfw_size);
static inline unsigned long
hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
{
unsigned long nr_pages = reg->size >> PAGE_SHIFT;
unsigned long start, end;
start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
end = start + nr_pages * vmemmap_entry_size;
start = ALIGN_DOWN(start, PAGE_SIZE);
end = ALIGN(end, PAGE_SIZE);
return end - start;
}
static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
{
unsigned long res = 0, i;
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
vmemmap_entry_size);
}
return res >> PAGE_SHIFT;
}
static inline unsigned long hyp_shadow_table_pages(size_t shadow_entry_size)
{
return PAGE_ALIGN(KVM_MAX_PVMS * shadow_entry_size) >> PAGE_SHIFT;
}
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
unsigned long total = 0, i;

View File

@@ -24,6 +24,7 @@
#define REG_NS_INTERRUPT_ENABLE_PER_VID_SET 0x20
#define REG_NS_INTERRUPT_CLEAR 0x2c
#define REG_NS_VERSION 0x60
#define REG_NS_STATUS 0x68
#define REG_NS_NUM_CONTEXT 0x100
#define REG_NS_CONTEXT_CFG_VALID_VID 0x104
#define REG_NS_ALL_INVALIDATION 0x1000
@@ -67,6 +68,9 @@
VERSION_MINOR_ARCH_VER_MASK | \
VERSION_REV_ARCH_VER_MASK)
#define STATUS_BUSY BIT(0)
#define STATUS_ON_INVALIDATING BIT(1)
#define NUM_CONTEXT_MASK GENMASK(3, 0)
#define CONTEXT_CFG_VALID_VID_CTX_VALID(ctx) BIT((4 * (ctx)) + 3)
@@ -268,7 +272,7 @@ static inline bool __is_smpt_uniform(u32 *smpt, enum mpt_prot prot)
return true;
}
/**
/*
* Set protection bits of FMPT/SMPT in a given range.
* Returns flags specifying whether L1/L2 changes need to be made visible
* to the device.

View File

@@ -0,0 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_MEM_ENCRYPT_H
#define __ASM_MEM_ENCRYPT_H
bool mem_encrypt_active(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
#endif /* __ASM_MEM_ENCRYPT_H */

View File

@@ -111,6 +111,9 @@ static __always_inline bool has_vhe(void)
/*
* Code only run in VHE/NVHE hyp context can assume VHE is present or
* absent. Otherwise fall back to caps.
* This allows the compiler to discard VHE-specific code from the
* nVHE object, reducing the number of external symbol references
* needed to link.
*/
if (is_vhe_hyp_code())
return true;

View File

@@ -413,6 +413,15 @@ struct kvm_arm_copy_mte_tags {
#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
/* Protected KVM */
#define KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA 0
#define KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO 1
struct kvm_protected_vm_info {
__u64 firmware_size;
__u64 __reserved[7];
};
#endif
#endif /* __ARM_KVM_H__ */

View File

@@ -1892,15 +1892,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
#ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{
if (kvm_get_mode() != KVM_MODE_PROTECTED)
return false;
if (is_kernel_in_hyp_mode()) {
pr_warn("Protected KVM not available with VHE\n");
return false;
}
return true;
return kvm_get_mode() == KVM_MODE_PROTECTED;
}
#endif /* CONFIG_KVM */

View File

@@ -76,9 +76,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
/* Vectors installed by hyp-init on reset HVC. */
KVM_NVHE_ALIAS(__hyp_stub_vectors);
/* Kernel symbol used by icache_is_vpipt(). */
KVM_NVHE_ALIAS(__icache_flags);
/* Kernel symbols needed for cpus_have_final/const_caps checks. */
KVM_NVHE_ALIAS(arm64_const_caps_ready);
KVM_NVHE_ALIAS(cpu_hwcap_keys);

View File

@@ -40,6 +40,7 @@
#include <asm/elf.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/hypervisor.h>
#include <asm/kasan.h>
#include <asm/numa.h>
#include <asm/sections.h>
@@ -49,6 +50,7 @@
#include <asm/tlbflush.h>
#include <asm/traps.h>
#include <asm/efi.h>
#include <asm/hypervisor.h>
#include <asm/xen/hypervisor.h>
#include <asm/mmu_context.h>
@@ -445,3 +447,9 @@ static int __init register_arm64_panic_block(void)
return 0;
}
device_initcall(register_arm64_panic_block);
void kvm_arm_init_hyp_services(void)
{
kvm_init_ioremap_services();
kvm_init_memshare_services();
}

View File

@@ -32,7 +32,7 @@ define rule_gen_hyp_constants
$(call filechk,offsets,__HYP_CONSTANTS_H__)
endef
CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include
CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include -D__KVM_NVHE_HYPERVISOR__=1
$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE
$(call if_changed_dep,cc_s_c)

View File

@@ -88,7 +88,9 @@ static u64 timer_get_offset(struct arch_timer_context *ctxt)
switch(arch_timer_ctx_index(ctxt)) {
case TIMER_VTIMER:
return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
if (likely(!kvm_vm_is_protected(vcpu->kvm)))
return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
fallthrough;
default:
return 0;
}
@@ -754,6 +756,9 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
if (unlikely(kvm_vm_is_protected(vcpu->kvm)))
cntvoff = 0;
mutex_lock(&kvm->lock);
kvm_for_each_vcpu(i, tmp, kvm)
timer_set_offset(vcpu_vtimer(tmp), cntvoff);

View File

@@ -37,6 +37,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pkvm.h>
#include <asm/kvm_emulate.h>
#include <asm/sections.h>
@@ -63,6 +64,10 @@ static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
/* KVM "vendor" hypercalls which may be forwarded to userspace on request. */
#define KVM_EXIT_HYPERCALL_VALID_MASK (BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE) | \
BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE))
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -83,13 +88,21 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
{
int r;
if (cap->flags)
return -EINVAL;
/* Capabilities with flags */
switch (cap->cap) {
case KVM_CAP_ARM_PROTECTED_VM:
return kvm_arm_vm_ioctl_pkvm(kvm, cap);
default:
if (cap->flags)
return -EINVAL;
}
/* Capabilities without flags */
switch (cap->cap) {
case KVM_CAP_ARM_NISV_TO_USER:
r = 0;
kvm->arch.return_nisv_io_abort_to_user = true;
set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&kvm->arch.flags);
break;
case KVM_CAP_ARM_MTE:
mutex_lock(&kvm->lock);
@@ -97,10 +110,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = -EINVAL;
} else {
r = 0;
kvm->arch.mte_enabled = true;
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
}
mutex_unlock(&kvm->lock);
break;
case KVM_CAP_EXIT_HYPERCALL:
if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK)
return -EINVAL;
if (cap->args[1] || cap->args[2] || cap->args[3])
return -EINVAL;
WRITE_ONCE(kvm->arch.hypercall_exit_enabled, cap->args[0]);
r = 0;
break;
default:
r = -EINVAL;
break;
@@ -138,17 +161,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int ret;
ret = kvm_arm_setup_stage2(kvm, type);
if (ret)
return ret;
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
if (ret)
return ret;
if (type & ~KVM_VM_TYPE_MASK)
return -EINVAL;
ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
goto out_free_stage2_pgd;
return ret;
ret = kvm_init_pvm(kvm, type);
if (ret)
return ret;
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type);
if (ret)
return ret;
kvm_vgic_early_init(kvm);
@@ -157,9 +183,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
set_default_spectre(kvm);
return ret;
out_free_stage2_pgd:
kvm_free_stage2_pgd(&kvm->arch.mmu);
return ret;
}
@@ -168,6 +191,30 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
void free_hyp_memcache(struct kvm_hyp_memcache *mc);
static void kvm_shadow_destroy(struct kvm *kvm)
{
struct kvm_pinned_page *ppage, *tmp;
struct mm_struct *mm = current->mm;
struct list_head *ppages;
if (kvm->arch.pkvm.shadow_handle)
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_shadow, kvm));
free_hyp_memcache(&kvm->arch.pkvm.teardown_mc);
ppages = &kvm->arch.pkvm.pinned_pages;
list_for_each_entry_safe(ppage, tmp, ppages, link) {
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_reclaim_page,
page_to_pfn(ppage->page)));
cond_resched();
account_locked_vm(mm, 1, false);
unpin_user_pages_dirty_lock(&ppage->page, 1, true);
list_del(&ppage->link);
kfree(ppage);
}
}
/**
* kvm_arch_destroy_vm - destroy the VM data structure
@@ -180,6 +227,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
bitmap_free(kvm->arch.pmu_filter);
kvm_vgic_destroy(kvm);
kvm_shadow_destroy(kvm);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
@@ -192,9 +240,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_unshare_hyp(kvm, kvm + 1);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
static int kvm_check_extension(struct kvm *kvm, long ext)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
@@ -285,6 +334,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_PTRAUTH_GENERIC:
r = system_has_full_ptr_auth();
break;
case KVM_CAP_EXIT_HYPERCALL:
r = KVM_EXIT_HYPERCALL_VALID_MASK;
break;
default:
r = 0;
}
@@ -292,6 +344,75 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
return r;
}
/*
* Checks whether the exctension specified in ext is supported for protected
* vms. The capabilities supported by kvm in general are passed in kvm_cap.
*/
static int pkvm_check_extension(struct kvm *kvm, long ext, int kvm_cap)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_ARM_PSCI:
case KVM_CAP_ARM_PSCI_0_2:
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
case KVM_CAP_MSI_DEVID:
case KVM_CAP_ARM_VM_IPA_SIZE:
case KVM_CAP_EXIT_HYPERCALL:
r = kvm_cap;
break;
case KVM_CAP_GUEST_DEBUG_HW_BPS:
r = min(kvm_cap, pkvm_get_max_brps());
break;
case KVM_CAP_GUEST_DEBUG_HW_WPS:
r = min(kvm_cap, pkvm_get_max_wrps());
break;
case KVM_CAP_ARM_PMU_V3:
r = kvm_cap && FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER),
PVM_ID_AA64DFR0_ALLOW);
break;
case KVM_CAP_ARM_SVE:
r = kvm_cap && FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE),
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
break;
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
r = kvm_cap &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_API),
PVM_ID_AA64ISAR1_ALLOW) &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA),
PVM_ID_AA64ISAR1_ALLOW);
break;
case KVM_CAP_ARM_PTRAUTH_GENERIC:
r = kvm_cap &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI),
PVM_ID_AA64ISAR1_ALLOW) &&
FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA),
PVM_ID_AA64ISAR1_ALLOW);
break;
case KVM_CAP_ARM_PROTECTED_VM:
r = 1;
break;
default:
r = 0;
break;
}
return r;
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r = kvm_check_extension(kvm, ext);
if (unlikely(kvm && kvm_vm_is_protected(kvm)))
r = pkvm_check_extension(kvm, ext, r);
return r;
}
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -364,7 +485,10 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
if (is_protected_kvm_enabled())
free_hyp_memcache(&vcpu->arch.pkvm_memcache);
else
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
@@ -389,15 +513,14 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
* doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
vgic_v4_put(vcpu, true);
kvm_vgic_put(vcpu, true);
preempt_enable();
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
preempt_disable();
vgic_v4_load(vcpu);
kvm_vgic_load(vcpu);
preempt_enable();
}
@@ -406,6 +529,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
struct kvm_s2_mmu *mmu;
int *last_ran;
if (is_protected_kvm_enabled())
goto nommu;
mmu = vcpu->arch.hw_mmu;
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
@@ -423,6 +549,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
*last_ran = vcpu->vcpu_id;
}
nommu:
vcpu->cpu = cpu;
kvm_vgic_load(vcpu);
@@ -442,16 +569,32 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (vcpu_has_ptrauth(vcpu))
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (is_protected_kvm_enabled()) {
kvm_call_hyp_nvhe(__pkvm_vcpu_load, vcpu);
kvm_call_hyp(__vgic_v3_restore_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
}
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (is_protected_kvm_enabled()) {
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp_nvhe(__pkvm_vcpu_put, vcpu);
/* __pkvm_vcpu_put implies a sync of the state */
if (!kvm_vm_is_protected(vcpu->kvm))
vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY;
}
kvm_arch_vcpu_put_debug_state_flags(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
kvm_vcpu_put_sysregs_vhe(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vgic_put(vcpu, false);
kvm_vcpu_pmu_restore_host(vcpu);
vcpu->cpu = -1;
@@ -650,13 +793,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
static_branch_inc(&userspace_irqchip_in_use);
}
/*
* Initialize traps for protected VMs.
* NOTE: Move to run in EL2 directly, rather than via a hypercall, once
* the code is in place for first run initialization at EL2.
*/
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
if (is_protected_kvm_enabled()) {
/* Start with the vcpu in a dirty state */
if (!kvm_vm_is_protected(vcpu->kvm))
vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY;
ret = create_el2_shadow(kvm);
}
return ret;
}
@@ -807,6 +949,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
ret = kvm_handle_mmio_return(vcpu);
if (ret)
return ret;
} else if (run->exit_reason == KVM_EXIT_HYPERCALL) {
smccc_set_retval(vcpu,
vcpu->run->hypercall.ret,
vcpu->run->hypercall.args[0],
vcpu->run->hypercall.args[1],
vcpu->run->hypercall.args[2]);
}
vcpu_load(vcpu);
@@ -1859,6 +2007,8 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(smccc_trng_available) = smccc_trng_available;
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
if (ret)
@@ -2205,7 +2355,11 @@ static int __init early_kvm_mode_cfg(char *arg)
return -EINVAL;
if (strcmp(arg, "protected") == 0) {
kvm_mode = KVM_MODE_PROTECTED;
if (!is_kernel_in_hyp_mode())
kvm_mode = KVM_MODE_PROTECTED;
else
pr_warn_once("Protected KVM not available with VHE\n");
return 0;
}

View File

@@ -201,6 +201,21 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
{
int handled;
/*
* If we run a non-protected VM when protection is enabled
* system-wide, resync the state from the hypervisor and mark
* it as dirty on the host side if it wasn't dirty already
* (which could happen if preemption has taken place).
*/
if (is_protected_kvm_enabled() && !kvm_vm_is_protected(vcpu->kvm)) {
preempt_disable();
if (!(vcpu->arch.flags & KVM_ARM64_PKVM_STATE_DIRTY)) {
kvm_call_hyp_nvhe(__pkvm_vcpu_sync_state, vcpu);
vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY;
}
preempt_enable();
}
/*
* See ARM ARM B1.14.1: "Hyp traps on instructions
* that fail their condition code check"
@@ -260,6 +275,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
/* For exit types that need handling before we can be preempted */
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
{
/*
* We just exited, so the state is clean from a hypervisor
* perspective.
*/
if (is_protected_kvm_enabled())
vcpu->arch.flags &= ~KVM_ARM64_PKVM_STATE_DIRTY;
if (ARM_SERROR_PENDING(exception_index)) {
if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
u64 disr = kvm_vcpu_get_disr(vcpu);

View File

@@ -2,9 +2,12 @@
#include <linux/kbuild.h>
#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
int main(void)
{
DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
DEFINE(KVM_SHADOW_VM_SIZE, sizeof(struct kvm_shadow_vm));
DEFINE(SHADOW_VCPU_STATE_SIZE, sizeof(struct shadow_vcpu_state));
return 0;
}

View File

@@ -1,200 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2021 Google LLC
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_FIXED_CONFIG_H__
#define __ARM64_KVM_FIXED_CONFIG_H__
#include <asm/sysreg.h>
/*
* This file contains definitions for features to be allowed or restricted for
* guest virtual machines, depending on the mode KVM is running in and on the
* type of guest that is running.
*
* The ALLOW masks represent a bitmask of feature fields that are allowed
* without any restrictions as long as they are supported by the system.
*
* The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
* features that are restricted to support at most the specified feature.
*
* If a feature field is not present in either, than it is not supported.
*
* The approach taken for protected VMs is to allow features that are:
* - Needed by common Linux distributions (e.g., floating point)
* - Trivial to support, e.g., supporting the feature does not introduce or
* require tracking of additional state in KVM
* - Cannot be trapped or prevent the guest from using anyway
*/
/*
* Allow for protected VMs:
* - Floating-point and Advanced SIMD
* - Data Independent Timing
*/
#define PVM_ID_AA64PFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - AArch64 guests only (no support for AArch32 guests):
* AArch32 adds complexity in trap handling, emulation, condition codes,
* etc...
* - RAS (v1)
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
)
/*
* Allow for protected VMs:
* - Branch Target Identification
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
)
/*
* Allow for protected VMs:
* - Mixed-endian
* - Distinction between Secure and Non-secure Memory
* - Mixed-endian at EL0 only
* - Non-context synchronizing exception entry and exit
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
)
/*
* Restrict to the following *unsigned* features for protected VMs:
* - 40-bit IPA
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
)
/*
* Allow for protected VMs:
* - Hardware translation table updates to Access flag and Dirty state
* - Number of VMID bits from CPU
* - Hierarchical Permission Disables
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
)
/*
* Allow for protected VMs:
* - Common not Private translations
* - User Access Override
* - IESB bit in the SCTLR_ELx registers
* - Unaligned single-copy atomicity and atomic functions
* - ESR_ELx.EC value on an exception by read access to feature ID space
* - TTL field in address operations.
* - Break-before-make sequences when changing translation block size
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
)
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
* trapping at EL2
*/
#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
/*
* No support for debug, including breakpoints, and watchpoints for protected
* VMs:
* The Arm architecture mandates support for at least the Armv8 debug
* architecture, which would include at least 2 hardware breakpoints and
* watchpoints. Providing that support to protected guests adds
* considerable state and complexity. Therefore, the reserved value of 0 is
* used for debug-related fields.
*/
#define PVM_ID_AA64DFR0_ALLOW (0ULL)
#define PVM_ID_AA64DFR1_ALLOW (0ULL)
/*
* No support for implementation defined features.
*/
#define PVM_ID_AA64AFR0_ALLOW (0ULL)
#define PVM_ID_AA64AFR1_ALLOW (0ULL)
/*
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
)
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
)
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
int kvm_check_pvm_sysreg_table(void);
#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */

View File

@@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ARM64_KVM_NVHE_IOMMU_H__
#define __ARM64_KVM_NVHE_IOMMU_H__
#include <linux/types.h>
#include <asm/kvm_host.h>
#include <nvhe/mem_protect.h>
struct kvm_iommu_ops {
int (*init)(void);
bool (*host_smc_handler)(struct kvm_cpu_context *host_ctxt);
bool (*host_mmio_dabt_handler)(struct kvm_cpu_context *host_ctxt,
phys_addr_t fault_pa, unsigned int len,
bool is_write, int rd);
void (*host_stage2_set_owner)(phys_addr_t addr, size_t size, pkvm_id owner_id);
int (*host_stage2_adjust_mmio_range)(phys_addr_t addr, phys_addr_t *start,
phys_addr_t *end);
};
extern struct kvm_iommu_ops kvm_iommu_ops;
extern const struct kvm_iommu_ops kvm_s2mpu_ops;
#endif /* __ARM64_KVM_NVHE_IOMMU_H__ */

View File

@@ -51,18 +51,40 @@ struct host_kvm {
};
extern struct host_kvm host_kvm;
extern const u8 pkvm_hyp_id;
typedef u32 pkvm_id;
static const pkvm_id pkvm_host_id = 0;
static const pkvm_id pkvm_hyp_id = (1 << 16);
static const pkvm_id pkvm_host_poison = pkvm_hyp_id + 1;
extern unsigned long hyp_nr_cpus;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
int __pkvm_host_unshare_hyp(u64 pfn);
int __pkvm_host_reclaim_page(u64 pfn);
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu);
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu);
int __pkvm_guest_share_host(struct kvm_vcpu *vcpu, u64 ipa);
int __pkvm_guest_unshare_host(struct kvm_vcpu *vcpu, u64 ipa);
int __pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa);
int __pkvm_remove_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa);
bool __pkvm_check_ioguard_page(struct kvm_vcpu *vcpu);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, pkvm_id owner_id);
int kvm_host_prepare_stage2(void *pgt_pool_base);
int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
int hyp_pin_shared_mem(void *from, void *to);
void hyp_unpin_shared_mem(void *from, void *to);
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
struct kvm_hyp_memcache *host_mc);
void reclaim_guest_pages(struct kvm_shadow_vm *vm, struct kvm_hyp_memcache *mc);
static __always_inline void __load_host_stage2(void)
{
if (static_branch_likely(&kvm_protected_mode_initialized))

View File

@@ -45,4 +45,27 @@ static inline int hyp_page_count(void *addr)
return p->refcount;
}
static inline void hyp_page_ref_inc(struct hyp_page *p)
{
BUG_ON(p->refcount == USHRT_MAX);
p->refcount++;
}
static inline void hyp_page_ref_dec(struct hyp_page *p)
{
BUG_ON(!p->refcount);
p->refcount--;
}
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
hyp_page_ref_dec(p);
return (p->refcount == 0);
}
static inline void hyp_set_page_refcounted(struct hyp_page *p)
{
BUG_ON(p->refcount);
p->refcount = 1;
}
#endif /* __KVM_HYP_MEMORY_H */

View File

@@ -13,25 +13,17 @@
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
int hyp_create_pcpu_fixmap(void);
void *hyp_fixmap_map(phys_addr_t phys);
int hyp_fixmap_unmap(void);
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
int hyp_back_vmemmap(phys_addr_t back);
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot);
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
unsigned long *start, unsigned long *end)
{
unsigned long nr_pages = size >> PAGE_SHIFT;
struct hyp_page *p = hyp_phys_to_page(phys);
*start = (unsigned long)p;
*end = *start + nr_pages * sizeof(struct hyp_page);
*start = ALIGN_DOWN(*start, PAGE_SIZE);
*end = ALIGN(*end, PAGE_SIZE);
}
#endif /* __KVM_HYP_MM_H */

View File

@@ -0,0 +1,103 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2021 Google LLC
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_NVHE_PKVM_H__
#define __ARM64_KVM_NVHE_PKVM_H__
#include <asm/kvm_pkvm.h>
#include <nvhe/gfp.h>
#include <nvhe/spinlock.h>
/*
* A container for the vcpu state that hyp needs to maintain for protected VMs.
*/
struct shadow_vcpu_state {
struct kvm_shadow_vm *vm;
struct kvm_vcpu vcpu;
};
/*
* Holds the relevant data for running a protected vm.
*/
struct kvm_shadow_vm {
/* A unique id to the shadow structs in the hyp shadow area. */
int shadow_handle;
/* Number of vcpus for the vm. */
int created_vcpus;
/* Pointers to the shadow vcpus of the shadow vm. */
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
/* Primary vCPU pending entry to the pvmfw */
struct kvm_vcpu *pvmfw_entry_vcpu;
/* The host's kvm structure. */
struct kvm *host_kvm;
/* The total size of the donated shadow area. */
size_t shadow_area_size;
struct kvm_arch arch;
struct kvm_pgtable pgt;
struct kvm_pgtable_mm_ops mm_ops;
struct hyp_pool pool;
hyp_spinlock_t lock;
/* Array of the shadow state per vcpu. */
struct shadow_vcpu_state shadow_vcpus[0];
};
static inline bool vcpu_is_protected(struct kvm_vcpu *vcpu)
{
if (!is_protected_kvm_enabled())
return false;
return vcpu->arch.pkvm.shadow_vm->arch.pkvm.enabled;
}
extern struct kvm_shadow_vm **shadow_table;
extern phys_addr_t pvmfw_base;
extern phys_addr_t pvmfw_size;
int __pkvm_init_shadow(struct kvm *kvm, void *shadow_va, size_t size, void *pgd);
int __pkvm_teardown_shadow(struct kvm *kvm);
struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, int vcpu_idx);
void put_shadow_vcpu(struct kvm_vcpu *vcpu);
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
void kvm_reset_pvm_sys_regs(struct kvm_vcpu *vcpu);
int kvm_check_pvm_sysreg_table(void);
void pkvm_reset_vcpu(struct kvm_vcpu *vcpu);
bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code);
struct kvm_vcpu *pvm_mpidr_to_vcpu(struct kvm_shadow_vm *vm, unsigned long mpidr);
static inline bool pvm_has_pvmfw(struct kvm_shadow_vm *vm)
{
return vm->arch.pkvm.pvmfw_load_addr != PVMFW_INVALID_LOAD_ADDR;
}
static inline bool ipa_in_pvmfw_region(struct kvm_shadow_vm *vm, u64 ipa)
{
struct kvm_protected_vm *pkvm = &vm->arch.pkvm;
if (!pvm_has_pvmfw(vm))
return false;
return ipa - pkvm->pvmfw_load_addr < pvmfw_size;
}
int pkvm_load_pvmfw_pages(struct kvm_shadow_vm *vm, u64 ipa, phys_addr_t phys,
u64 size);
#endif /* __ARM64_KVM_NVHE_PKVM_H__ */

View File

@@ -28,9 +28,17 @@ typedef union hyp_spinlock {
};
} hyp_spinlock_t;
#define __HYP_SPIN_LOCK_INITIALIZER \
{ .__val = 0 }
#define __HYP_SPIN_LOCK_UNLOCKED \
((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER)
#define DEFINE_HYP_SPINLOCK(x) hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED
#define hyp_spin_lock_init(l) \
do { \
*(l) = (hyp_spinlock_t){ .__val = 0 }; \
*(l) = __HYP_SPIN_LOCK_UNLOCKED; \
} while (0)
static inline void hyp_spin_lock(hyp_spinlock_t *lock)

View File

@@ -15,6 +15,4 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */

View File

@@ -11,3 +11,13 @@ SYM_FUNC_START_PI(dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
SYM_FUNC_END_PI(dcache_clean_inval_poc)
SYM_FUNC_START_PI(icache_inval_pou)
alternative_if ARM64_HAS_CACHE_DIC
isb
ret
alternative_else_nop_endif
invalidate_icache_by_line x0, x1, x2, x3
ret
SYM_FUNC_END_PI(icache_inval_pou)

View File

@@ -4,6 +4,8 @@
* Author: Andrew Scull <ascull@google.com>
*/
#include <kvm/arm_hypercalls.h>
#include <hyp/adjust_pc.h>
#include <asm/pgtable-types.h>
@@ -13,28 +15,760 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <nvhe/iommu.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/pkvm.h>
#include <nvhe/trap_handler.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <uapi/linux/psci.h>
#include "../../sys_regs.h"
struct pkvm_loaded_state {
/* loaded vcpu is HYP VA */
struct kvm_vcpu *vcpu;
bool is_protected;
/*
* Host FPSIMD state. Written to when the guest accesses its
* own FPSIMD state, and read when the guest state is live and
* that it needs to be switched back to the host.
*
* Only valid when the KVM_ARM64_FP_ENABLED flag is set in the
* shadow structure.
*/
struct user_fpsimd_state host_fpsimd_state;
};
static DEFINE_PER_CPU(struct pkvm_loaded_state, loaded_state);
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
struct kvm_iommu_ops kvm_iommu_ops;
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
typedef void (*shadow_entry_exit_handler_fn)(struct kvm_vcpu *, struct kvm_vcpu *);
static void handle_pvm_entry_wfx(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
shadow_vcpu->arch.flags |= host_vcpu->arch.flags & KVM_ARM64_INCREMENT_PC;
}
static int pkvm_refill_memcache(struct kvm_vcpu *shadow_vcpu,
struct kvm_vcpu *host_vcpu)
{
u64 nr_pages;
nr_pages = VTCR_EL2_LVLS(shadow_vcpu->arch.pkvm.shadow_vm->arch.vtcr) - 1;
return refill_memcache(&shadow_vcpu->arch.pkvm_memcache, nr_pages,
&host_vcpu->arch.pkvm_memcache);
}
static void handle_pvm_entry_psci(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
u32 psci_fn = smccc_get_function(shadow_vcpu);
u64 ret = vcpu_get_reg(host_vcpu, 0);
switch (psci_fn) {
case PSCI_0_2_FN_CPU_ON:
case PSCI_0_2_FN64_CPU_ON:
/*
* Check whether the cpu_on request to the host was successful.
* If not, reset the vcpu state from ON_PENDING to OFF.
* This could happen if this vcpu attempted to turn on the other
* vcpu while the other one is in the process of turning itself
* off.
*/
if (ret != PSCI_RET_SUCCESS) {
struct kvm_shadow_vm *vm = shadow_vcpu->arch.pkvm.shadow_vm;
unsigned long cpu_id = smccc_get_arg1(shadow_vcpu);
struct kvm_vcpu *vcpu = pvm_mpidr_to_vcpu(vm, cpu_id);
if (vcpu && READ_ONCE(vcpu->arch.pkvm.power_state) == PSCI_0_2_AFFINITY_LEVEL_ON_PENDING)
WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_OFF);
ret = PSCI_RET_INTERNAL_FAILURE;
}
break;
default:
break;
}
vcpu_set_reg(shadow_vcpu, 0, ret);
}
static void handle_pvm_entry_hvc64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
u32 fn = smccc_get_function(shadow_vcpu);
switch (fn) {
case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
pkvm_refill_memcache(shadow_vcpu, host_vcpu);
break;
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
fallthrough;
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
vcpu_set_reg(shadow_vcpu, 0, SMCCC_RET_SUCCESS);
break;
default:
handle_pvm_entry_psci(host_vcpu, shadow_vcpu);
break;
}
}
static void handle_pvm_entry_sys64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
unsigned long host_flags;
host_flags = READ_ONCE(host_vcpu->arch.flags);
/* Exceptions have priority on anything else */
if (host_flags & KVM_ARM64_PENDING_EXCEPTION) {
/* Exceptions caused by this should be undef exceptions. */
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
__vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr;
shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_EXCEPT_AA64_EL1);
return;
}
if (host_flags & KVM_ARM64_INCREMENT_PC) {
shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
}
if (!esr_sys64_to_params(shadow_vcpu->arch.fault.esr_el2).is_write) {
/* r0 as transfer register between the guest and the host. */
u64 rt_val = vcpu_get_reg(host_vcpu, 0);
int rt = kvm_vcpu_sys_get_rt(shadow_vcpu);
vcpu_set_reg(shadow_vcpu, rt, rt_val);
}
}
static void handle_pvm_entry_iabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
unsigned long cpsr = *vcpu_cpsr(shadow_vcpu);
unsigned long host_flags;
u32 esr = ESR_ELx_IL;
host_flags = READ_ONCE(host_vcpu->arch.flags);
if (!(host_flags & KVM_ARM64_PENDING_EXCEPTION))
return;
/*
* If the host wants to inject an exception, get syndrom and
* fault address.
*/
if ((cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT);
else
esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT);
esr |= ESR_ELx_FSC_EXTABT;
__vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr;
__vcpu_sys_reg(shadow_vcpu, FAR_EL1) = kvm_vcpu_get_hfar(shadow_vcpu);
/* Tell the run loop that we want to inject something */
shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_EXCEPT_AA64_EL1);
}
static void handle_pvm_entry_dabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
unsigned long host_flags;
bool rd_update;
host_flags = READ_ONCE(host_vcpu->arch.flags);
/* Exceptions have priority over anything else */
if (host_flags & KVM_ARM64_PENDING_EXCEPTION) {
unsigned long cpsr = *vcpu_cpsr(shadow_vcpu);
u32 esr = ESR_ELx_IL;
if ((cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
esr |= (ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT);
else
esr |= (ESR_ELx_EC_DABT_CUR << ESR_ELx_EC_SHIFT);
esr |= ESR_ELx_FSC_EXTABT;
__vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr;
__vcpu_sys_reg(shadow_vcpu, FAR_EL1) = kvm_vcpu_get_hfar(shadow_vcpu);
/* Tell the run loop that we want to inject something */
shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
KVM_ARM64_EXCEPT_AA64_EL1);
/* Cancel potential in-flight MMIO */
shadow_vcpu->mmio_needed = false;
return;
}
/* Handle PC increment on MMIO */
if ((host_flags & KVM_ARM64_INCREMENT_PC) && shadow_vcpu->mmio_needed) {
shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
}
/* If we were doing an MMIO read access, update the register*/
rd_update = (shadow_vcpu->mmio_needed &&
(host_flags & KVM_ARM64_INCREMENT_PC));
rd_update &= !kvm_vcpu_dabt_iswrite(shadow_vcpu);
if (rd_update) {
/* r0 as transfer register between the guest and the host. */
u64 rd_val = vcpu_get_reg(host_vcpu, 0);
int rd = kvm_vcpu_dabt_get_rd(shadow_vcpu);
vcpu_set_reg(shadow_vcpu, rd, rd_val);
}
shadow_vcpu->mmio_needed = false;
}
static void handle_pvm_exit_wfx(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
host_vcpu->arch.ctxt.regs.pstate = shadow_vcpu->arch.ctxt.regs.pstate &
PSR_MODE_MASK;
host_vcpu->arch.fault.esr_el2 = shadow_vcpu->arch.fault.esr_el2;
}
static void handle_pvm_exit_sys64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
u32 esr_el2 = shadow_vcpu->arch.fault.esr_el2;
/* r0 as transfer register between the guest and the host. */
WRITE_ONCE(host_vcpu->arch.fault.esr_el2,
esr_el2 & ~ESR_ELx_SYS64_ISS_RT_MASK);
/* The mode is required for the host to emulate some sysregs */
WRITE_ONCE(host_vcpu->arch.ctxt.regs.pstate,
shadow_vcpu->arch.ctxt.regs.pstate & PSR_MODE_MASK);
if (esr_sys64_to_params(esr_el2).is_write) {
int rt = kvm_vcpu_sys_get_rt(shadow_vcpu);
u64 rt_val = vcpu_get_reg(shadow_vcpu, rt);
vcpu_set_reg(host_vcpu, 0, rt_val);
}
}
static void handle_pvm_exit_hvc64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
int n, i;
switch (smccc_get_function(shadow_vcpu)) {
/*
* CPU_ON takes 3 arguments, however, to wake up the target vcpu the
* host only needs to know the target's cpu_id, which is passed as the
* first argument. The processing of the reset state is done at hyp.
*/
case PSCI_0_2_FN_CPU_ON:
case PSCI_0_2_FN64_CPU_ON:
n = 2;
break;
case PSCI_0_2_FN_CPU_OFF:
case PSCI_0_2_FN_SYSTEM_OFF:
case PSCI_0_2_FN_CPU_SUSPEND:
case PSCI_0_2_FN64_CPU_SUSPEND:
n = 1;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
fallthrough;
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
n = 4;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
n = 3;
break;
/*
* The rest are either blocked or handled by HYP, so we should
* really never be here.
*/
default:
BUG();
}
host_vcpu->arch.fault.esr_el2 = shadow_vcpu->arch.fault.esr_el2;
/* Pass the hvc function id (r0) as well as any potential arguments. */
for (i = 0; i < n; i++)
vcpu_set_reg(host_vcpu, i, vcpu_get_reg(shadow_vcpu, i));
}
static void handle_pvm_exit_iabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
WRITE_ONCE(host_vcpu->arch.fault.esr_el2,
shadow_vcpu->arch.fault.esr_el2);
WRITE_ONCE(host_vcpu->arch.fault.hpfar_el2,
shadow_vcpu->arch.fault.hpfar_el2);
}
static void handle_pvm_exit_dabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
shadow_vcpu->mmio_needed = __pkvm_check_ioguard_page(shadow_vcpu);
if (shadow_vcpu->mmio_needed) {
/* r0 as transfer register between the guest and the host. */
WRITE_ONCE(host_vcpu->arch.fault.esr_el2,
shadow_vcpu->arch.fault.esr_el2 & ~ESR_ELx_SRT_MASK);
if (kvm_vcpu_dabt_iswrite(shadow_vcpu)) {
int rt = kvm_vcpu_dabt_get_rd(shadow_vcpu);
u64 rt_val = vcpu_get_reg(shadow_vcpu, rt);
vcpu_set_reg(host_vcpu, 0, rt_val);
}
} else {
WRITE_ONCE(host_vcpu->arch.fault.esr_el2,
shadow_vcpu->arch.fault.esr_el2 & ~ESR_ELx_ISV);
}
WRITE_ONCE(host_vcpu->arch.ctxt.regs.pstate,
shadow_vcpu->arch.ctxt.regs.pstate & PSR_MODE_MASK);
WRITE_ONCE(host_vcpu->arch.fault.far_el2,
shadow_vcpu->arch.fault.far_el2 & FAR_MASK);
WRITE_ONCE(host_vcpu->arch.fault.hpfar_el2,
shadow_vcpu->arch.fault.hpfar_el2);
WRITE_ONCE(__vcpu_sys_reg(host_vcpu, SCTLR_EL1),
__vcpu_sys_reg(shadow_vcpu, SCTLR_EL1) & (SCTLR_ELx_EE | SCTLR_EL1_E0E));
}
static void handle_vm_entry_generic(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
unsigned long host_flags = READ_ONCE(host_vcpu->arch.flags);
shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
KVM_ARM64_EXCEPT_MASK);
if (host_flags & KVM_ARM64_PENDING_EXCEPTION) {
shadow_vcpu->arch.flags |= KVM_ARM64_PENDING_EXCEPTION;
shadow_vcpu->arch.flags |= host_flags & KVM_ARM64_EXCEPT_MASK;
} else if (host_flags & KVM_ARM64_INCREMENT_PC) {
shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
}
}
static void handle_vm_exit_generic(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
host_vcpu->arch.fault.esr_el2 = shadow_vcpu->arch.fault.esr_el2;
}
static void handle_vm_exit_abt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
{
host_vcpu->arch.fault = shadow_vcpu->arch.fault;
}
static const shadow_entry_exit_handler_fn entry_pvm_shadow_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_WFx] = handle_pvm_entry_wfx,
[ESR_ELx_EC_HVC64] = handle_pvm_entry_hvc64,
[ESR_ELx_EC_SYS64] = handle_pvm_entry_sys64,
[ESR_ELx_EC_IABT_LOW] = handle_pvm_entry_iabt,
[ESR_ELx_EC_DABT_LOW] = handle_pvm_entry_dabt,
};
static const shadow_entry_exit_handler_fn exit_pvm_shadow_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_WFx] = handle_pvm_exit_wfx,
[ESR_ELx_EC_HVC64] = handle_pvm_exit_hvc64,
[ESR_ELx_EC_SYS64] = handle_pvm_exit_sys64,
[ESR_ELx_EC_IABT_LOW] = handle_pvm_exit_iabt,
[ESR_ELx_EC_DABT_LOW] = handle_pvm_exit_dabt,
};
static const shadow_entry_exit_handler_fn entry_vm_shadow_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = handle_vm_entry_generic,
};
static const shadow_entry_exit_handler_fn exit_vm_shadow_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = handle_vm_exit_generic,
[ESR_ELx_EC_IABT_LOW] = handle_vm_exit_abt,
[ESR_ELx_EC_DABT_LOW] = handle_vm_exit_abt,
};
static void flush_vgic_state(struct kvm_vcpu *host_vcpu,
struct kvm_vcpu *shadow_vcpu)
{
struct vgic_v3_cpu_if *host_cpu_if, *shadow_cpu_if;
unsigned int used_lrs, max_lrs, i;
host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
shadow_cpu_if = &shadow_vcpu->arch.vgic_cpu.vgic_v3;
max_lrs = (read_gicreg(ICH_VTR_EL2) & 0xf) + 1;
used_lrs = READ_ONCE(host_cpu_if->used_lrs);
used_lrs = min(used_lrs, max_lrs);
shadow_cpu_if->vgic_hcr = host_cpu_if->vgic_hcr;
/* Should be a one-off */
shadow_cpu_if->vgic_sre = (ICC_SRE_EL1_DIB |
ICC_SRE_EL1_DFB |
ICC_SRE_EL1_SRE);
shadow_cpu_if->used_lrs = used_lrs;
for (i = 0; i < used_lrs; i++)
shadow_cpu_if->vgic_lr[i] = host_cpu_if->vgic_lr[i];
}
static void sync_vgic_state(struct kvm_vcpu *host_vcpu,
struct kvm_vcpu *shadow_vcpu)
{
struct vgic_v3_cpu_if *host_cpu_if, *shadow_cpu_if;
unsigned int i;
host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
shadow_cpu_if = &shadow_vcpu->arch.vgic_cpu.vgic_v3;
host_cpu_if->vgic_hcr = shadow_cpu_if->vgic_hcr;
for (i = 0; i < shadow_cpu_if->used_lrs; i++)
host_cpu_if->vgic_lr[i] = shadow_cpu_if->vgic_lr[i];
}
static void flush_timer_state(struct pkvm_loaded_state *state)
{
struct kvm_vcpu *shadow_vcpu = state->vcpu;
if (!state->is_protected)
return;
/*
* A shadow vcpu has no offset, and sees vtime == ptime. The
* ptimer is fully emulated by EL1 and cannot be trusted.
*/
write_sysreg(0, cntvoff_el2);
isb();
write_sysreg_el0(__vcpu_sys_reg(shadow_vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
write_sysreg_el0(__vcpu_sys_reg(shadow_vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL);
}
static void sync_timer_state(struct pkvm_loaded_state *state)
{
struct kvm_vcpu *shadow_vcpu = state->vcpu;
if (!state->is_protected)
return;
/*
* Preserve the vtimer state so that it is always correct,
* even if the host tries to make a mess.
*/
__vcpu_sys_reg(shadow_vcpu, CNTV_CVAL_EL0) = read_sysreg_el0(SYS_CNTV_CVAL);
__vcpu_sys_reg(shadow_vcpu, CNTV_CTL_EL0) = read_sysreg_el0(SYS_CNTV_CTL);
}
static void __sync_vcpu_state(struct kvm_vcpu *from_vcpu,
struct kvm_vcpu *to_vcpu)
{
int i;
to_vcpu->arch.ctxt.regs = from_vcpu->arch.ctxt.regs;
to_vcpu->arch.ctxt.spsr_abt = from_vcpu->arch.ctxt.spsr_abt;
to_vcpu->arch.ctxt.spsr_und = from_vcpu->arch.ctxt.spsr_und;
to_vcpu->arch.ctxt.spsr_irq = from_vcpu->arch.ctxt.spsr_irq;
to_vcpu->arch.ctxt.spsr_fiq = from_vcpu->arch.ctxt.spsr_fiq;
/*
* Copy the sysregs, but don't mess with the timer state which
* is directly handled by EL1 and is expected to be preserved.
*/
for (i = 1; i < NR_SYS_REGS; i++) {
if (i >= CNTVOFF_EL2 && i <= CNTP_CTL_EL0)
continue;
to_vcpu->arch.ctxt.sys_regs[i] = from_vcpu->arch.ctxt.sys_regs[i];
}
}
static void flush_shadow_state(struct pkvm_loaded_state *state)
{
struct kvm_vcpu *shadow_vcpu = state->vcpu;
struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu;
u8 esr_ec;
shadow_entry_exit_handler_fn ec_handler;
if (READ_ONCE(shadow_vcpu->arch.pkvm.power_state) == PSCI_0_2_AFFINITY_LEVEL_ON_PENDING)
pkvm_reset_vcpu(shadow_vcpu);
/*
* If we deal with a non-protected guest and that the state is
* dirty (from a host perspective), copy the state back into
* the shadow.
*/
if (!state->is_protected) {
if (READ_ONCE(host_vcpu->arch.flags) & KVM_ARM64_PKVM_STATE_DIRTY)
__sync_vcpu_state(host_vcpu, shadow_vcpu);
state->vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS & ~(HCR_RW | HCR_TWI | HCR_TWE);
state->vcpu->arch.hcr_el2 |= host_vcpu->arch.hcr_el2;
}
flush_vgic_state(host_vcpu, shadow_vcpu);
flush_timer_state(state);
switch (ARM_EXCEPTION_CODE(shadow_vcpu->arch.pkvm.exit_code)) {
case ARM_EXCEPTION_IRQ:
case ARM_EXCEPTION_EL1_SERROR:
case ARM_EXCEPTION_IL:
break;
case ARM_EXCEPTION_TRAP:
esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(shadow_vcpu));
if (state->is_protected)
ec_handler = entry_pvm_shadow_handlers[esr_ec];
else
ec_handler = entry_vm_shadow_handlers[esr_ec];
if (ec_handler)
ec_handler(host_vcpu, shadow_vcpu);
break;
default:
BUG();
}
shadow_vcpu->arch.pkvm.exit_code = 0;
}
static void sync_shadow_state(struct pkvm_loaded_state *state, u32 exit_reason)
{
struct kvm_vcpu *shadow_vcpu = state->vcpu;
struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu;
u8 esr_ec;
shadow_entry_exit_handler_fn ec_handler;
/*
* Don't sync the vcpu GPR/sysreg state after a run. Instead,
* leave it in the shadow until someone actually requires it.
*/
sync_vgic_state(host_vcpu, shadow_vcpu);
sync_timer_state(state);
switch (ARM_EXCEPTION_CODE(exit_reason)) {
case ARM_EXCEPTION_IRQ:
break;
case ARM_EXCEPTION_TRAP:
esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(shadow_vcpu));
if (state->is_protected)
ec_handler = exit_pvm_shadow_handlers[esr_ec];
else
ec_handler = exit_vm_shadow_handlers[esr_ec];
if (ec_handler)
ec_handler(host_vcpu, shadow_vcpu);
break;
case ARM_EXCEPTION_EL1_SERROR:
case ARM_EXCEPTION_IL:
break;
default:
BUG();
}
host_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | KVM_ARM64_INCREMENT_PC);
shadow_vcpu->arch.pkvm.exit_code = exit_reason;
}
static void fpsimd_host_restore(void)
{
sysreg_clear_set(cptr_el2, CPTR_EL2_TZ | CPTR_EL2_TFP, 0);
isb();
if (unlikely(is_protected_kvm_enabled())) {
struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state);
__fpsimd_save_state(&state->vcpu->arch.ctxt.fp_regs);
__fpsimd_restore_state(&state->host_fpsimd_state);
state->vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
state->vcpu->arch.flags |= KVM_ARM64_FP_HOST;
}
if (system_supports_sve())
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
}
static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
struct pkvm_loaded_state *state;
int handle;
/* Why did you bother? */
if (!is_protected_kvm_enabled())
return;
state = this_cpu_ptr(&loaded_state);
/* Nice try */
if (state->vcpu)
return;
vcpu = kern_hyp_va(vcpu);
handle = READ_ONCE(vcpu->arch.pkvm.shadow_handle);
state->vcpu = get_shadow_vcpu(handle, vcpu->vcpu_idx);
if (!state->vcpu)
return;
state->is_protected = state->vcpu->arch.pkvm.shadow_vm->arch.pkvm.enabled;
state->vcpu->arch.host_fpsimd_state = &state->host_fpsimd_state;
state->vcpu->arch.flags |= KVM_ARM64_FP_HOST;
if (state->is_protected) {
/* Propagate WFx trapping flags, trap ptrauth */
state->vcpu->arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI |
HCR_API | HCR_APK);
state->vcpu->arch.hcr_el2 |= vcpu->arch.hcr_el2 & (HCR_TWE |
HCR_TWI);
}
}
static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu));
if (unlikely(is_protected_kvm_enabled())) {
struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state);
vcpu = kern_hyp_va(vcpu);
if (state->vcpu && state->vcpu->arch.pkvm.host_vcpu == vcpu) {
if (state->vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
fpsimd_host_restore();
if (!state->is_protected &&
!(READ_ONCE(vcpu->arch.flags) & KVM_ARM64_PKVM_STATE_DIRTY))
__sync_vcpu_state(state->vcpu, vcpu);
put_shadow_vcpu(state->vcpu);
/* "It's over and done with..." */
state->vcpu = NULL;
}
}
}
static void handle___pkvm_vcpu_sync_state(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
if (unlikely(is_protected_kvm_enabled())) {
struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state);
vcpu = kern_hyp_va(vcpu);
if (!state->vcpu || state->is_protected ||
state->vcpu->arch.pkvm.host_vcpu != vcpu)
return;
__sync_vcpu_state(state->vcpu, vcpu);
}
}
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
int ret;
if (unlikely(is_protected_kvm_enabled())) {
struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state);
flush_shadow_state(state);
ret = __kvm_vcpu_run(state->vcpu);
sync_shadow_state(state, ret);
if (state->vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
/*
* The guest has used the FP, trap all accesses
* from the host (both FP and SVE).
*/
u64 reg = CPTR_EL2_TFP;
if (system_supports_sve())
reg |= CPTR_EL2_TZ;
sysreg_clear_set(cptr_el2, 0, reg);
}
} else {
ret = __kvm_vcpu_run(kern_hyp_va(vcpu));
}
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_host_donate_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 3);
struct pkvm_loaded_state *state;
int ret = -EINVAL;
if (!is_protected_kvm_enabled())
goto out;
vcpu = kern_hyp_va(vcpu);
state = this_cpu_ptr(&loaded_state);
if (!state->vcpu)
goto out;
/* Topup shadow memcache with the host's */
ret = pkvm_refill_memcache(state->vcpu, vcpu);
if (!ret) {
if (state->is_protected)
ret = __pkvm_host_donate_guest(pfn, gfn, state->vcpu);
else
ret = __pkvm_host_share_guest(pfn, gfn, state->vcpu);
}
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
__kvm_adjust_pc(kern_hyp_va(vcpu));
vcpu = kern_hyp_va(vcpu);
if (unlikely(is_protected_kvm_enabled())) {
struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state);
/*
* A shadow vcpu can never be updated from EL1, and we
* must have a vcpu loaded when protected mode is
* enabled.
*/
if (!state->vcpu || state->is_protected)
return;
}
__kvm_adjust_pc(vcpu);
}
static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
@@ -84,16 +818,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
}
static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
}
static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
}
static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_init_lrs();
@@ -104,18 +828,68 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
}
static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
static struct vgic_v3_cpu_if *get_shadow_vgic_v3_cpu_if(struct vgic_v3_cpu_if *cpu_if)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
if (unlikely(is_protected_kvm_enabled())) {
struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state);
struct kvm_vcpu *host_vcpu;
__vgic_v3_save_aprs(kern_hyp_va(cpu_if));
if (!state->vcpu)
return NULL;
host_vcpu = state->vcpu->arch.pkvm.host_vcpu;
if (&host_vcpu->arch.vgic_cpu.vgic_v3 != cpu_if)
return NULL;
}
return cpu_if;
}
static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
struct vgic_v3_cpu_if *shadow_cpu_if;
__vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
cpu_if = kern_hyp_va(cpu_if);
shadow_cpu_if = get_shadow_vgic_v3_cpu_if(cpu_if);
__vgic_v3_save_vmcr_aprs(shadow_cpu_if);
if (cpu_if != shadow_cpu_if) {
int i;
cpu_if->vgic_vmcr = shadow_cpu_if->vgic_vmcr;
for (i = 0; i < ARRAY_SIZE(cpu_if->vgic_ap0r); i++) {
cpu_if->vgic_ap0r[i] = shadow_cpu_if->vgic_ap0r[i];
cpu_if->vgic_ap1r[i] = shadow_cpu_if->vgic_ap1r[i];
}
}
}
static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
struct vgic_v3_cpu_if *shadow_cpu_if;
cpu_if = kern_hyp_va(cpu_if);
shadow_cpu_if = get_shadow_vgic_v3_cpu_if(cpu_if);
if (cpu_if != shadow_cpu_if) {
int i;
shadow_cpu_if->vgic_vmcr = cpu_if->vgic_vmcr;
/* Should be a one-off */
shadow_cpu_if->vgic_sre = (ICC_SRE_EL1_DIB |
ICC_SRE_EL1_DFB |
ICC_SRE_EL1_SRE);
for (i = 0; i < ARRAY_SIZE(cpu_if->vgic_ap0r); i++) {
shadow_cpu_if->vgic_ap0r[i] = cpu_if->vgic_ap0r[i];
shadow_cpu_if->vgic_ap1r[i] = cpu_if->vgic_ap1r[i];
}
}
__vgic_v3_restore_vmcr_aprs(shadow_cpu_if);
}
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
@@ -157,6 +931,13 @@ static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
}
static void handle___pkvm_host_reclaim_page(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_host_reclaim_page(pfn);
}
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
@@ -171,11 +952,22 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
static void handle___pkvm_init_shadow(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
DECLARE_REG(void *, host_shadow_va, host_ctxt, 2);
DECLARE_REG(size_t, shadow_size, host_ctxt, 3);
DECLARE_REG(void *, pgd, host_ctxt, 4);
__pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
cpu_reg(host_ctxt, 1) = __pkvm_init_shadow(host_kvm, host_shadow_va,
shadow_size, pgd);
}
static void handle___pkvm_teardown_shadow(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_teardown_shadow(host_kvm);
}
typedef void (*hcall_t)(struct kvm_cpu_context *);
@@ -195,6 +987,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__pkvm_host_reclaim_page),
HANDLE_FUNC(__pkvm_host_donate_guest),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
@@ -202,11 +996,13 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_read_vmcr),
HANDLE_FUNC(__vgic_v3_write_vmcr),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_aprs),
HANDLE_FUNC(__pkvm_vcpu_init_traps),
HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_init_shadow),
HANDLE_FUNC(__pkvm_teardown_shadow),
HANDLE_FUNC(__pkvm_vcpu_load),
HANDLE_FUNC(__pkvm_vcpu_put),
HANDLE_FUNC(__pkvm_vcpu_sync_state),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
@@ -274,10 +1070,9 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
case ESR_ELx_EC_SMC64:
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_FP_ASIMD:
case ESR_ELx_EC_SVE:
sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
isb();
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
fpsimd_host_restore();
break;
case ESR_ELx_EC_IABT_LOW:
case ESR_ELx_EC_DABT_LOW:

View File

@@ -13,6 +13,7 @@
#include <linux/arm-smccc.h>
#include <nvhe/iommu.h>
#include <nvhe/memory.h>
#include <nvhe/mm.h>
#include <nvhe/spinlock.h>
@@ -113,10 +114,26 @@ static void __set_control_regs(struct s2mpu *dev)
writel_relaxed(ctrl0, dev->va + REG_NS_CTRL0);
}
/* Poll the given SFR as long as its value has all bits of a given mask set. */
static void __wait_while(void __iomem *addr, u32 mask)
{
while ((readl_relaxed(addr) & mask) == mask)
continue;
}
static void __wait_for_invalidation_complete(struct s2mpu *dev)
{
/* Must not access SFRs while S2MPU is busy invalidating (v9 only). */
if (is_version(dev, S2MPU_VERSION_9)) {
__wait_while(dev->va + REG_NS_STATUS,
STATUS_BUSY | STATUS_ON_INVALIDATING);
}
}
static void __all_invalidation(struct s2mpu *dev)
{
writel_relaxed(INVALIDATION_INVALIDATE,
dev->va + REG_NS_ALL_INVALIDATION);
writel_relaxed(INVALIDATION_INVALIDATE, dev->va + REG_NS_ALL_INVALIDATION);
__wait_for_invalidation_complete(dev);
}
static void __range_invalidation(struct s2mpu *dev, phys_addr_t first_byte,
@@ -128,6 +145,7 @@ static void __range_invalidation(struct s2mpu *dev, phys_addr_t first_byte,
writel_relaxed(start_ppn, dev->va + REG_NS_RANGE_INVALIDATION_START_PPN);
writel_relaxed(end_ppn, dev->va + REG_NS_RANGE_INVALIDATION_END_PPN);
writel_relaxed(INVALIDATION_INVALIDATE, dev->va + REG_NS_RANGE_INVALIDATION);
__wait_for_invalidation_complete(dev);
}
static void __set_l1entry_attr_with_prot(struct s2mpu *dev, unsigned int gb,
@@ -157,7 +175,7 @@ static void __set_l1entry_l2table_addr(struct s2mpu *dev, unsigned int gb,
dev->va + REG_NS_L1ENTRY_L2TABLE_ADDR(vid, gb));
}
/**
/*
* Initialize S2MPU device and set all GB regions to 1G granularity with
* given protection bits.
*/
@@ -176,7 +194,7 @@ static void initialize_with_prot(struct s2mpu *dev, enum mpt_prot prot)
__set_control_regs(dev);
}
/**
/*
* Initialize S2MPU device, set L2 table addresses and configure L1TABLE_ATTR
* registers according to the given MPT struct.
*/
@@ -199,7 +217,7 @@ static void initialize_with_mpt(struct s2mpu *dev, struct mpt *mpt)
__set_control_regs(dev);
}
/**
/*
* Set MPT protection bits set to 'prot' in the give byte range (page-aligned).
* Update currently powered S2MPUs.
*/
@@ -237,10 +255,13 @@ static void set_mpt_range_locked(struct mpt *mpt, phys_addr_t first_byte,
__range_invalidation(dev, first_byte, last_byte);
}
static void s2mpu_host_stage2_set_owner(phys_addr_t addr, size_t size, u8 owner_id)
static void s2mpu_host_stage2_set_owner(phys_addr_t addr, size_t size,
pkvm_id owner_id)
{
enum mpt_prot prot;
/* Grant access only to the default owner of the page table (ID=0). */
enum mpt_prot prot = owner_id ? MPT_PROT_NONE : MPT_PROT_RW;
prot = owner_id == pkvm_host_id ? MPT_PROT_RW : MPT_PROT_NONE;
/*
* NOTE: The following code refers to 'end' as the exclusive upper

File diff suppressed because it is too large Load Diff

View File

@@ -14,6 +14,7 @@
#include <nvhe/early_alloc.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/spinlock.h>
@@ -24,6 +25,7 @@ struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
static u64 __io_map_base;
static DEFINE_PER_CPU(void *, hyp_fixmap_base);
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
@@ -37,6 +39,22 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
static unsigned long hyp_alloc_private_va_range(size_t size)
{
unsigned long addr = __io_map_base;
hyp_assert_lock_held(&pkvm_pgd_lock);
__io_map_base += PAGE_ALIGN(size);
/* Are we overflowing on the vmemmap ? */
if (__io_map_base > __hyp_vmemmap) {
__io_map_base = addr;
addr = (unsigned long)ERR_PTR(-ENOMEM);
}
return addr;
}
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot)
{
@@ -45,16 +63,10 @@ unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
hyp_spin_lock(&pkvm_pgd_lock);
size = PAGE_ALIGN(size + offset_in_page(phys));
addr = __io_map_base;
__io_map_base += size;
/* Are we overflowing on the vmemmap ? */
if (__io_map_base > __hyp_vmemmap) {
__io_map_base -= size;
addr = (unsigned long)ERR_PTR(-ENOMEM);
size = size + offset_in_page(phys);
addr = hyp_alloc_private_va_range(size);
if (IS_ERR((void *)addr))
goto out;
}
err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
if (err) {
@@ -105,13 +117,36 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return ret;
}
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
int hyp_back_vmemmap(phys_addr_t back)
{
unsigned long start, end;
unsigned long i, start, size, end = 0;
int ret;
hyp_vmemmap_range(phys, size, &start, &end);
for (i = 0; i < hyp_memblock_nr; i++) {
start = hyp_memory[i].base;
start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE);
/*
* The begining of the hyp_vmemmap region for the current
* memblock may already be backed by the page backing the end
* the previous region, so avoid mapping it twice.
*/
start = max(start, end);
return __pkvm_create_mappings(start, end - start, back, PAGE_HYP);
end = hyp_memory[i].base + hyp_memory[i].size;
end = PAGE_ALIGN((u64)hyp_phys_to_page(end));
if (start >= end)
continue;
size = end - start;
ret = __pkvm_create_mappings(start, size, back, PAGE_HYP);
if (ret)
return ret;
memset(hyp_phys_to_virt(back), 0, size);
back += size;
}
return 0;
}
static void *__hyp_bp_vect_base;
@@ -163,6 +198,89 @@ int hyp_map_vectors(void)
return 0;
}
void *hyp_fixmap_map(phys_addr_t phys)
{
void *addr = *this_cpu_ptr(&hyp_fixmap_base);
int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE,
phys, PAGE_HYP);
return ret ? NULL : addr;
}
int hyp_fixmap_unmap(void)
{
void *addr = *this_cpu_ptr(&hyp_fixmap_base);
int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE);
return (ret != PAGE_SIZE) ? -EINVAL : 0;
}
static int __pin_pgtable_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1)
return -EINVAL;
hyp_page_ref_inc(hyp_virt_to_page(ptep));
return 0;
}
static int hyp_pin_pgtable_pages(u64 addr)
{
struct kvm_pgtable_walker walker = {
.cb = __pin_pgtable_cb,
.flags = KVM_PGTABLE_WALK_LEAF,
};
return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
}
int hyp_create_pcpu_fixmap(void)
{
unsigned long i;
int ret = 0;
u64 addr;
hyp_spin_lock(&pkvm_pgd_lock);
for (i = 0; i < hyp_nr_cpus; i++) {
addr = hyp_alloc_private_va_range(PAGE_SIZE);
if (IS_ERR((void *)addr)) {
ret = -ENOMEM;
goto unlock;
}
/*
* Create a dummy mapping, to get the intermediate page-table
* pages allocated, then take a reference on the last level
* page to keep it around at all times.
*/
ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE,
__hyp_pa(__hyp_bss_start), PAGE_HYP);
if (ret) {
ret = -EINVAL;
goto unlock;
}
ret = hyp_pin_pgtable_pages(addr);
if (ret)
goto unlock;
ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, PAGE_SIZE);
if (ret != PAGE_SIZE) {
ret = -EINVAL;
goto unlock;
} else {
ret = 0;
}
*per_cpu_ptr(&hyp_fixmap_base, i) = (void *)addr;
}
unlock:
hyp_spin_unlock(&pkvm_pgd_lock);
return ret;
}
int hyp_create_idmap(u32 hyp_va_bits)
{
unsigned long start, end;
@@ -187,3 +305,30 @@ int hyp_create_idmap(u32 hyp_va_bits)
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;
if (!host_mc->nr_pages)
return NULL;
/*
* The host still owns the pages in its memcache, so we need to go
* through a full host-to-hyp donation cycle to change it. Fortunately,
* __pkvm_host_donate_hyp() takes care of races for us, so if it
* succeeds we're good to go.
*/
if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1))
return NULL;
return pop_hyp_memcache(host_mc, hyp_phys_to_virt);
}
/* Refill our local memcache by poping pages from the one provided by the host. */
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
struct kvm_hyp_memcache *host_mc)
{
return __topup_hyp_memcache(mc, min_pages, admit_host_page,
hyp_virt_to_phys, host_mc);
}

View File

@@ -93,11 +93,15 @@ static inline struct hyp_page *node_to_page(struct list_head *node)
static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
phys_addr_t phys = hyp_page_to_phys(p);
unsigned short order = p->order;
struct hyp_page *buddy;
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
if (phys < pool->range_start || phys >= pool->range_end)
goto insert;
/*
* Only the first struct hyp_page of a high-order page (otherwise known
* as the 'head') should have p->order set. The non-head pages should
@@ -116,6 +120,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
p = min(p, buddy);
}
insert:
/* Mark the new head, and insert it */
p->order = order;
page_add_to_list(p, &pool->free_area[order]);
@@ -144,25 +149,6 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
return p;
}
static inline void hyp_page_ref_inc(struct hyp_page *p)
{
BUG_ON(p->refcount == USHRT_MAX);
p->refcount++;
}
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
BUG_ON(!p->refcount);
p->refcount--;
return (p->refcount == 0);
}
static inline void hyp_set_page_refcounted(struct hyp_page *p)
{
BUG_ON(p->refcount);
p->refcount = 1;
}
static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
{
if (hyp_page_ref_dec_and_test(p))
@@ -249,10 +235,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
/* Init the vmemmap portion */
p = hyp_phys_to_page(phys);
for (i = 0; i < nr_pages; i++) {
p[i].order = 0;
for (i = 0; i < nr_pages; i++)
hyp_set_page_refcounted(&p[i]);
}
/* Attach the unused pages to the buddy tree */
for (i = reserved_pages; i < nr_pages; i++)

File diff suppressed because it is too large Load Diff

View File

@@ -11,15 +11,19 @@
#include <asm/kvm_pkvm.h>
#include <nvhe/early_alloc.h>
#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/iommu.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/pkvm.h>
#include <nvhe/trap_handler.h>
unsigned long hyp_nr_cpus;
phys_addr_t pvmfw_base;
phys_addr_t pvmfw_size;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
(unsigned long)__per_cpu_start)
@@ -31,16 +35,20 @@ static struct hyp_pool hpool;
static int divide_memory_pool(void *virt, unsigned long size)
{
unsigned long vstart, vend, nr_pages;
unsigned long nr_pages;
hyp_early_alloc_init(virt, size);
hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend);
nr_pages = (vend - vstart) >> PAGE_SHIFT;
nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page));
vmemmap_base = hyp_early_alloc_contig(nr_pages);
if (!vmemmap_base)
return -ENOMEM;
nr_pages = hyp_shadow_table_pages(sizeof(struct kvm_shadow_vm));
shadow_table = hyp_early_alloc_contig(nr_pages);
if (!shadow_table)
return -ENOMEM;
nr_pages = hyp_s1_pgtable_pages();
hyp_pgt_base = hyp_early_alloc_contig(nr_pages);
if (!hyp_pgt_base)
@@ -78,7 +86,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base));
ret = hyp_back_vmemmap(hyp_virt_to_phys(vmemmap_base));
if (ret)
return ret;
@@ -130,6 +138,13 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
start = hyp_phys_to_virt(pvmfw_base);
end = start + pvmfw_size;
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
ret = pkvm_create_mappings(start, end, prot);
if (ret)
return ret;
return 0;
}
@@ -161,12 +176,11 @@ static void hpool_put_page(void *addr)
hyp_put_page(&hpool, addr);
}
static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
static int fix_host_ownership_walker(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
kvm_pte_t pte = *ptep;
@@ -175,15 +189,6 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte))
return 0;
/*
* Fix-up the refcount for the page-table pages as the early allocator
* was unable to access the hyp_vmemmap and so the buddy allocator has
* initialised the refcount to '1'.
*/
mm_ops->get_page(ptep);
if (flag != KVM_PGTABLE_WALK_LEAF)
return 0;
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
@@ -212,12 +217,30 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
}
static int finalize_host_mappings(void)
static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
kvm_pte_t pte = *ptep;
/*
* Fix-up the refcount for the page-table pages as the early allocator
* was unable to access the hyp_vmemmap and so the buddy allocator has
* initialised the refcount to '1'.
*/
if (kvm_pte_valid(pte))
mm_ops->get_page(ptep);
return 0;
}
static int fix_host_ownership(void)
{
struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pkvm_pgtable.mm_ops,
.cb = fix_host_ownership_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
};
int i, ret;
@@ -233,6 +256,18 @@ static int finalize_host_mappings(void)
return 0;
}
static int fix_hyp_pgtable_refcnt(void)
{
struct kvm_pgtable_walker walker = {
.cb = fix_hyp_pgtable_refcnt_walker,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pkvm_pgtable.mm_ops,
};
return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits),
&walker);
}
static int select_iommu_ops(enum kvm_iommu_driver driver)
{
switch (driver) {
@@ -284,7 +319,15 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
}
ret = finalize_host_mappings();
ret = fix_host_ownership();
if (ret)
goto out;
ret = fix_hyp_pgtable_refcnt();
if (ret)
goto out;
ret = hyp_create_pcpu_fixmap();
if (ret)
goto out;

View File

@@ -26,8 +26,8 @@
#include <asm/debug-monitors.h>
#include <asm/processor.h>
#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
#include <nvhe/pkvm.h>
/* Non-VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -205,6 +205,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
static const exit_handler_fn pvm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_HVC64] = kvm_handle_pvm_hvc64,
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
[ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
@@ -215,7 +216,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
if (unlikely(vcpu_is_protected(vcpu)))
return pvm_exit_handlers;
return hyp_exit_handlers;
@@ -234,9 +235,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
*/
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
/*
* As we have caught the guest red-handed, decide that it isn't
* fit for purpose anymore by making the vcpu invalid. The VMM

View File

@@ -8,10 +8,10 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pkvm.h>
#include <hyp/adjust_pc.h>
#include <nvhe/fixed_config.h>
#include <nvhe/pkvm.h>
#include "../../sys_regs.h"
@@ -338,6 +338,17 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Cache maintenance by set/way operations are restricted. */
/* Debug and Trace Registers are restricted. */
RAZ_WI(SYS_DBGBVRn_EL1(0)),
RAZ_WI(SYS_DBGBCRn_EL1(0)),
RAZ_WI(SYS_DBGWVRn_EL1(0)),
RAZ_WI(SYS_DBGWCRn_EL1(0)),
RAZ_WI(SYS_MDSCR_EL1),
RAZ_WI(SYS_OSLAR_EL1),
RAZ_WI(SYS_OSLSR_EL1),
RAZ_WI(SYS_OSDLR_EL1),
/* Group 1 ID registers */
RAZ_WI(SYS_REVIDR_EL1),
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
@@ -419,8 +430,80 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Performance Monitoring Registers are restricted. */
};
/* A structure to track reset values for system registers in protected vcpus. */
struct sys_reg_desc_reset {
/* Index into sys_reg[]. */
int reg;
/* Reset function. */
void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc_reset *);
/* Reset value. */
u64 value;
};
static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r)
{
__vcpu_sys_reg(vcpu, r->reg) = read_sysreg(actlr_el1);
}
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r)
{
__vcpu_sys_reg(vcpu, r->reg) = read_sysreg(amair_el1);
}
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r)
{
__vcpu_sys_reg(vcpu, r->reg) = calculate_mpidr(vcpu);
}
static void reset_value(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r)
{
__vcpu_sys_reg(vcpu, r->reg) = r->value;
}
/* Specify the register's reset value. */
#define RESET_VAL(REG, RESET_VAL) { REG, reset_value, RESET_VAL }
/* Specify a function that calculates the register's reset value. */
#define RESET_FUNC(REG, RESET_FUNC) { REG, RESET_FUNC, 0 }
/*
* Checks that the sysreg table is unique and in-order.
* Architected system registers reset values for Protected VMs.
* Important: Must be sorted ascending by REG (index into sys_reg[])
*/
static const struct sys_reg_desc_reset pvm_sys_reg_reset_vals[] = {
RESET_FUNC(MPIDR_EL1, reset_mpidr),
RESET_VAL(SCTLR_EL1, 0x00C50078),
RESET_FUNC(ACTLR_EL1, reset_actlr),
RESET_VAL(CPACR_EL1, 0),
RESET_VAL(TCR_EL1, 0),
RESET_VAL(VBAR_EL1, 0),
RESET_VAL(CONTEXTIDR_EL1, 0),
RESET_FUNC(AMAIR_EL1, reset_amair_el1),
RESET_VAL(CNTKCTL_EL1, 0),
RESET_VAL(DISR_EL1, 0),
};
/*
* Sets system registers to reset value
*
* This function finds the right entry and sets the registers on the protected
* vcpu to their architecturally defined reset values.
*/
void kvm_reset_pvm_sys_regs(struct kvm_vcpu *vcpu)
{
unsigned long i;
for (i = 0; i < ARRAY_SIZE(pvm_sys_reg_reset_vals); i++) {
const struct sys_reg_desc_reset *r = &pvm_sys_reg_reset_vals[i];
r->reset(vcpu, r);
}
}
/*
* Checks that the sysreg tables are unique and in-order.
*
* Returns 0 if the table is consistent, or 1 otherwise.
*/
@@ -433,6 +516,11 @@ int kvm_check_pvm_sysreg_table(void)
return 1;
}
for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_reset_vals); i++) {
if (pvm_sys_reg_reset_vals[i-1].reg >= pvm_sys_reg_reset_vals[i].reg)
return 1;
}
return 0;
}

View File

@@ -17,6 +17,17 @@ struct tlb_inv_context {
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
struct tlb_inv_context *cxt)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
vcpu = host_ctxt->__hyp_running_vcpu;
if (vcpu) {
WARN_ON(vcpu->arch.hw_mmu->vmid.vmid != mmu->vmid.vmid);
return;
}
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
@@ -45,6 +56,12 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
{
struct kvm_cpu_context *host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
if (host_ctxt->__hyp_running_vcpu)
return;
__load_host_stage2();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {

View File

@@ -46,9 +46,6 @@
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
KVM_PTE_LEAF_ATTR_HI_S2_XN)
#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
#define KVM_MAX_OWNER_ID 1
struct kvm_pgtable_walk_data {
struct kvm_pgtable *pgt;
struct kvm_pgtable_walker *walker;
@@ -167,11 +164,6 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
return pte;
}
static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
{
return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
}
static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag)
@@ -565,7 +557,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
struct stage2_map_data {
u64 phys;
kvm_pte_t attr;
u8 owner_id;
u64 annotation;
kvm_pte_t *anchor;
kvm_pte_t *childp;
@@ -700,12 +692,12 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}
static bool stage2_pte_executable(kvm_pte_t pte)
{
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
@@ -732,7 +724,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (kvm_phys_is_valid(phys))
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
else
new = kvm_init_invalid_leaf_owner(data->owner_id);
new = data->annotation;
if (stage2_pte_is_counted(old)) {
/*
@@ -744,20 +736,28 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!stage2_pte_needs_update(old, new))
return -EAGAIN;
/*
* If we're only changing software bits, then we don't need to
* do anything else/
*/
if (!((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
goto out_set_pte;
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
}
/* Perform CMOs before installation of the guest stage-2 PTE */
if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
granule);
granule);
if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
smp_store_release(ptep, new);
if (stage2_pte_is_counted(new))
mm_ops->get_page(ptep);
out_set_pte:
smp_store_release(ptep, new);
if (kvm_phys_is_valid(phys))
data->phys += granule;
return 0;
@@ -922,8 +922,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
return ret;
}
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc, u8 owner_id)
int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc, kvm_pte_t annotation)
{
int ret;
struct stage2_map_data map_data = {
@@ -931,8 +931,8 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
.mmu = pgt->mmu,
.memcache = mc,
.mm_ops = pgt->mm_ops,
.owner_id = owner_id,
.force_pte = true,
.annotation = annotation,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@@ -942,7 +942,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
.arg = &map_data,
};
if (owner_id > KVM_MAX_OWNER_ID)
if (annotation & PTE_VALID)
return -EINVAL;
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
@@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
*/
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
if (need_flush) {
kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
dcache_clean_inval_poc((unsigned long)pte_follow,
(unsigned long)pte_follow +
kvm_granule_size(level));
}
if (need_flush && mm_ops->dcache_clean_inval_poc)
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
kvm_granule_size(level));
if (childp)
mm_ops->put_page(childp);
@@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
if (!stage2_pte_cacheable(pgt, pte))
return 0;
pte_follow = kvm_pte_follow(pte, mm_ops);
dcache_clean_inval_poc((unsigned long)pte_follow,
(unsigned long)pte_follow +
kvm_granule_size(level));
if (mm_ops->dcache_clean_inval_poc)
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
kvm_granule_size(level));
return 0;
}
@@ -1206,6 +1200,15 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
return 0;
}
size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
{
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
}
static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)

View File

@@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
u64 __vgic_v3_read_vmcr(void)
static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
void __vgic_v3_write_vmcr(u32 vmcr)
static void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
__vgic_v3_save_aprs(cpu_if);
if (cpu_if->vgic_sre)
cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
}
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
/*
* If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
* is dependent on ICC_SRE_EL1.SRE, and we have to perform the
* VMCR_EL2 save/restore in the world switch.
*/
if (cpu_if->vgic_sre)
__vgic_v3_write_vmcr(cpu_if->vgic_vmcr);
__vgic_v3_restore_aprs(cpu_if);
}
static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */

View File

@@ -58,6 +58,24 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
val[3] = lower_32_bits(cycles);
}
static int kvm_vcpu_exit_hcall(struct kvm_vcpu *vcpu, u32 nr, u32 nr_args)
{
u64 mask = vcpu->kvm->arch.hypercall_exit_enabled;
u32 i;
if (nr_args > 6 || !(mask & BIT(nr)))
return -EINVAL;
vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
vcpu->run->hypercall.nr = nr;
for (i = 0; i < nr_args; ++i)
vcpu->run->hypercall.args[i] = vcpu_get_reg(vcpu, i + 1);
vcpu->run->hypercall.longmode = !vcpu_mode_is_32bit(vcpu);
return 0;
}
int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
{
u32 func_id = smccc_get_function(vcpu);
@@ -133,6 +151,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
kvm_ptp_get_time(vcpu, val);
break;
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
if (!kvm_vcpu_exit_hcall(vcpu, ARM_SMCCC_KVM_FUNC_MEM_SHARE, 3))
return 0;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
if (!kvm_vcpu_exit_hcall(vcpu, ARM_SMCCC_KVM_FUNC_MEM_UNSHARE, 3))
return 0;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
if (kvm_vm_is_protected(vcpu->kvm) && !topup_hyp_memcache(vcpu))
val[0] = SMCCC_RET_SUCCESS;
break;
case ARM_SMCCC_TRNG_VERSION:
case ARM_SMCCC_TRNG_FEATURES:
case ARM_SMCCC_TRNG_GET_UUID:

View File

@@ -134,7 +134,7 @@ static int s2mpu_probe_v9(struct platform_device *pdev, void __iomem *kaddr,
return 0;
}
/**
/*
* Parse interrupt information from DT and if found, register IRQ handler.
* This is considered optional and will not fail even if the initialization is
* unsuccessful. In that case the IRQ will remain masked.

View File

@@ -135,7 +135,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
* volunteered to do so, and bail out otherwise.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
run->arm_nisv.fault_ipa = fault_ipa;

View File

@@ -190,6 +190,22 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
__unmap_stage2_range(mmu, start, size, true);
}
static void pkvm_stage2_flush(struct kvm *kvm)
{
struct kvm_pinned_page *ppage;
/*
* Contrary to stage2_apply_range(), we don't need to check
* whether the VM is being torn down, as this is always called
* from a vcpu thread, and the list is only ever freed on VM
* destroy (which only occurs when all vcpu are gone).
*/
list_for_each_entry(ppage, &kvm->arch.pkvm.pinned_pages, link) {
__clean_dcache_guest_page(page_address(ppage->page), PAGE_SIZE);
cond_resched_lock(&kvm->mmu_lock);
}
}
static void stage2_flush_memslot(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
@@ -215,9 +231,13 @@ static void stage2_flush_vm(struct kvm *kvm)
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
stage2_flush_memslot(kvm, memslot);
if (!is_protected_kvm_enabled()) {
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots)
stage2_flush_memslot(kvm, memslot);
} else if (!kvm_vm_is_protected(kvm)) {
pkvm_stage2_flush(kvm);
}
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
@@ -618,15 +638,44 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
* kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
* @kvm: The pointer to the KVM structure
* @mmu: The pointer to the s2 MMU structure
* @type: The machine type of the virtual machine
*
* Allocates only the stage-2 HW PGD level table(s).
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
*/
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
{
u32 kvm_ipa_limit = get_kvm_ipa_limit();
int cpu, err;
struct kvm_pgtable *pgt;
u64 mmfr0, mmfr1;
u32 phys_shift;
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
if (is_protected_kvm_enabled()) {
phys_shift = kvm_ipa_limit;
} else if (phys_shift) {
if (phys_shift > kvm_ipa_limit ||
phys_shift < ARM64_MIN_PARANGE_BITS)
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
if (phys_shift > kvm_ipa_limit) {
pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
current->comm);
return -EINVAL;
}
}
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
INIT_LIST_HEAD(&kvm->arch.pkvm.pinned_pages);
mmu->arch = &kvm->arch;
if (is_protected_kvm_enabled())
return 0;
if (mmu->pgt != NULL) {
kvm_err("kvm_arch already initialized?\n");
@@ -736,6 +785,9 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
struct kvm_pgtable *pgt = NULL;
if (is_protected_kvm_enabled())
return;
spin_lock(&kvm->mmu_lock);
pgt = mmu->pgt;
if (pgt) {
@@ -751,6 +803,34 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
}
}
static void hyp_mc_free_fn(void *addr, void *unused)
{
free_page((unsigned long)addr);
}
static void *hyp_mc_alloc_fn(void *unused)
{
return (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
}
void free_hyp_memcache(struct kvm_hyp_memcache *mc)
{
if (is_protected_kvm_enabled())
__free_hyp_memcache(mc, hyp_mc_free_fn,
kvm_host_va, NULL);
}
int topup_hyp_memcache(struct kvm_vcpu *vcpu)
{
if (!is_protected_kvm_enabled())
return 0;
return __topup_hyp_memcache(&vcpu->arch.pkvm_memcache,
kvm_mmu_cache_min_pages(vcpu->kvm),
hyp_mc_alloc_fn,
kvm_host_pa, NULL);
}
/**
* kvm_phys_addr_ioremap - map a device range to guest IPA
*
@@ -1063,6 +1143,88 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
return 0;
}
static int pkvm_host_donate_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu)
{
struct arm_smccc_res res;
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_host_donate_guest),
pfn, gfn, vcpu, &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
/*
* Getting -EPERM at this point implies that the pfn has already been
* donated. This should only ever happen when two vCPUs faulted on the
* same page, and the current one lost the race to do the donation.
*/
return (res.a1 == -EPERM) ? -EAGAIN : res.a1;
}
static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long hva)
{
struct mm_struct *mm = current->mm;
unsigned int flags = FOLL_FORCE |
FOLL_HWPOISON |
FOLL_LONGTERM |
FOLL_WRITE;
struct kvm_pinned_page *ppage;
struct kvm *kvm = vcpu->kvm;
struct page *page;
u64 pfn;
int ret;
ret = topup_hyp_memcache(vcpu);
if (ret)
return -ENOMEM;
ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
if (!ppage)
return -ENOMEM;
ret = account_locked_vm(mm, 1, true);
if (ret)
goto free_ppage;
mmap_read_lock(mm);
ret = pin_user_pages(hva, 1, flags, &page, NULL);
mmap_read_unlock(mm);
if (ret == -EHWPOISON) {
kvm_send_hwpoison_signal(hva, PAGE_SHIFT);
ret = 0;
goto dec_account;
} else if (ret != 1) {
ret = -EFAULT;
goto dec_account;
}
spin_lock(&kvm->mmu_lock);
pfn = page_to_pfn(page);
ret = pkvm_host_donate_guest(pfn, fault_ipa >> PAGE_SHIFT, vcpu);
if (ret) {
if (ret == -EAGAIN)
ret = 0;
goto unpin;
}
ppage->page = page;
INIT_LIST_HEAD(&ppage->link);
list_add(&ppage->link, &kvm->arch.pkvm.pinned_pages);
spin_unlock(&kvm->mmu_lock);
return 0;
unpin:
spin_unlock(&kvm->mmu_lock);
unpin_user_pages(&page, 1);
dec_account:
account_locked_vm(mm, 1, false);
free_ppage:
kfree(ppage);
return ret;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -1393,7 +1555,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* faulting VA. This is always 12 bits, irrespective
* of the page size.
*/
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & FAR_MASK;
ret = io_mem_abort(vcpu, fault_ipa);
goto out_unlock;
}
@@ -1407,7 +1569,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
goto out_unlock;
}
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (is_protected_kvm_enabled())
ret = pkvm_mem_abort(vcpu, fault_ipa, hva);
else
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
out:

View File

@@ -4,14 +4,23 @@
* Author: Quentin Perret <qperret@google.com>
*/
#include <linux/io.h>
#include <linux/kvm_host.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/of_fdt.h>
#include <linux/of_reserved_mem.h>
#include <linux/sort.h>
#include <asm/kvm_pkvm.h>
#include "hyp_constants.h"
static struct reserved_mem *pkvm_firmware_mem;
static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
@@ -53,7 +62,7 @@ static int __init register_memblock_regions(void)
void __init kvm_hyp_reserve(void)
{
u64 nr_pages, prev, hyp_mem_pages = 0;
u64 hyp_mem_pages = 0;
int ret;
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
@@ -71,21 +80,8 @@ void __init kvm_hyp_reserve(void)
hyp_mem_pages += hyp_s1_pgtable_pages();
hyp_mem_pages += host_s2_pgtable_pages();
/*
* The hyp_vmemmap needs to be backed by pages, but these pages
* themselves need to be present in the vmemmap, so compute the number
* of pages needed by looking for a fixed point.
*/
nr_pages = 0;
do {
prev = nr_pages;
nr_pages = hyp_mem_pages + prev;
nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
PAGE_SIZE);
nr_pages += __hyp_pgtable_max_pages(nr_pages);
} while (nr_pages != prev);
hyp_mem_pages += nr_pages;
hyp_mem_pages += hyp_shadow_table_pages(KVM_SHADOW_VM_SIZE);
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
/*
* Try to allocate a PMD-aligned region to reduce TLB pressure once
@@ -107,3 +103,215 @@ void __init kvm_hyp_reserve(void)
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
hyp_mem_base);
}
/*
* Updates the state of the host's version of the vcpu state.
*/
static void update_vcpu_state(struct kvm_vcpu *vcpu, int shadow_handle)
{
vcpu->arch.pkvm.shadow_handle = shadow_handle;
}
/*
* Allocates and donates memory for EL2 shadow structs.
*
* Allocates space for the shadow state, which includes the shadow vm as well as
* the shadow vcpu states.
*
* Stores an opaque handler in the kvm struct for future reference.
*
* Return 0 on success, negative error code on failure.
*/
static int __create_el2_shadow(struct kvm *kvm)
{
struct kvm_vcpu *vcpu, **vcpu_array;
size_t pgd_sz, shadow_sz;
void *pgd, *shadow_addr;
unsigned long idx;
int shadow_handle;
int ret, i;
if (kvm->created_vcpus < 1)
return -EINVAL;
pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr);
/*
* The PGD pages will be reclaimed using a hyp_memcache which implies
* page granularity. So, use alloc_pages_exact() to get individual
* refcounts.
*/
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
if (!pgd)
return -ENOMEM;
/* Allocate memory to donate to hyp for the kvm and vcpu state. */
shadow_sz = PAGE_ALIGN(KVM_SHADOW_VM_SIZE +
SHADOW_VCPU_STATE_SIZE * kvm->created_vcpus);
shadow_addr = alloc_pages_exact(shadow_sz, GFP_KERNEL_ACCOUNT);
if (!shadow_addr) {
ret = -ENOMEM;
goto free_pgd;
}
/* Stash the vcpu pointers into the PGD */
BUILD_BUG_ON(KVM_MAX_VCPUS > (PAGE_SIZE / sizeof(u64)));
vcpu_array = pgd;
kvm_for_each_vcpu(idx, vcpu, kvm)
vcpu_array[idx] = vcpu;
/* Donate the shadow memory to hyp and let hyp initialize it. */
ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz,
pgd);
if (ret < 0)
goto free_shadow;
shadow_handle = ret;
/* Store the shadow handle given by hyp for future call reference. */
kvm->arch.pkvm.shadow_handle = shadow_handle;
/* Adjust host's vcpu state as it doesn't control it anymore. */
for (i = 0; i < kvm->created_vcpus; i++)
update_vcpu_state(kvm->vcpus[i], shadow_handle);
return 0;
free_shadow:
free_pages_exact(shadow_addr, shadow_sz);
free_pgd:
free_pages_exact(pgd, pgd_sz);
return ret;
}
int create_el2_shadow(struct kvm *kvm)
{
int ret = 0;
mutex_lock(&kvm->arch.pkvm.shadow_lock);
if (!kvm->arch.pkvm.shadow_handle)
ret = __create_el2_shadow(kvm);
mutex_unlock(&kvm->arch.pkvm.shadow_lock);
return ret;
}
static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
const char *reason)
{
phys_addr_t end = rmem->base + rmem->size;
kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
&rmem->base, &end, reason);
return -EINVAL;
}
static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
{
unsigned long node = rmem->fdt_node;
if (pkvm_firmware_mem)
return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
if (!of_get_flat_dt_prop(node, "no-map", NULL))
return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
if (of_get_flat_dt_prop(node, "reusable", NULL))
return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
if (!PAGE_ALIGNED(rmem->base))
return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
if (!PAGE_ALIGNED(rmem->size))
return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
*pvmfw_size = rmem->size;
*pvmfw_base = rmem->base;
pkvm_firmware_mem = rmem;
return 0;
}
RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
pkvm_firmware_rmem_init);
static int __init pkvm_firmware_rmem_clear(void)
{
void *addr;
phys_addr_t size;
if (likely(!pkvm_firmware_mem) || is_protected_kvm_enabled())
return 0;
kvm_info("Clearing unused pKVM firmware memory\n");
size = pkvm_firmware_mem->size;
addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB);
if (!addr)
return -EINVAL;
memset(addr, 0, size);
dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
memunmap(addr);
return 0;
}
device_initcall_sync(pkvm_firmware_rmem_clear);
static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
{
int ret = 0;
if (!pkvm_firmware_mem)
return -EINVAL;
mutex_lock(&kvm->arch.pkvm.shadow_lock);
if (kvm->arch.pkvm.shadow_handle) {
ret = -EBUSY;
goto out_unlock;
}
kvm->arch.pkvm.pvmfw_load_addr = ipa;
out_unlock:
mutex_unlock(&kvm->arch.pkvm.shadow_lock);
return ret;
}
static int pkvm_vm_ioctl_info(struct kvm *kvm,
struct kvm_protected_vm_info __user *info)
{
struct kvm_protected_vm_info kinfo = {
.firmware_size = pkvm_firmware_mem ?
pkvm_firmware_mem->size :
0,
};
return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
}
int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap)
{
if (cap->args[1] || cap->args[2] || cap->args[3])
return -EINVAL;
switch (cap->flags) {
case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
default:
return -EINVAL;
}
return 0;
}
int kvm_init_pvm(struct kvm *kvm, unsigned long type)
{
mutex_init(&kvm->arch.pkvm.shadow_lock);
kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
return 0;
if (!is_protected_kvm_enabled())
return -EINVAL;
kvm->arch.pkvm.enabled = true;
return 0;
}

View File

@@ -21,16 +21,6 @@
* as described in ARM document number ARM DEN 0022A.
*/
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
static unsigned long psci_affinity_mask(unsigned long affinity_level)
{
if (affinity_level <= 3)
return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
return 0;
}
static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
{
/*
@@ -59,12 +49,6 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
kvm_vcpu_kick(vcpu);
}
static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
unsigned long affinity)
{
return !(affinity & ~MPIDR_HWID_BITMASK);
}
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
{
struct vcpu_reset_state *reset_state;
@@ -195,18 +179,6 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
}
static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
{
int i;
/*
* Zero the input registers' upper 32 bits. They will be fully
* zeroed on exit, so we're fine changing them in place.
*/
for (i = 1; i < 4; i++)
vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i)));
}
static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn)
{
switch(fn) {

View File

@@ -32,15 +32,6 @@
/* Maximum phys_shift supported for any VM on this host */
static u32 kvm_ipa_limit;
/*
* ARMv8 Reset Values
*/
#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
PSR_F_BIT | PSR_D_BIT)
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
unsigned int kvm_sve_max_vl;
int kvm_arm_init_sve(void)
@@ -118,7 +109,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
kfree(buf);
return ret;
}
vcpu->arch.sve_state = buf;
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
return 0;
@@ -165,22 +156,6 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
}
static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
{
/*
* For now make sure that both address/generic pointer authentication
* features are requested by the userspace together and the system
* supports these capabilities.
*/
if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
!test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) ||
!system_has_full_ptr_auth())
return -EINVAL;
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
return 0;
}
static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *tmp;
@@ -370,32 +345,3 @@ int kvm_set_ipa_limit(void)
return 0;
}
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
{
u64 mmfr0, mmfr1;
u32 phys_shift;
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
return -EINVAL;
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
if (phys_shift) {
if (phys_shift > kvm_ipa_limit ||
phys_shift < ARM64_MIN_PARANGE_BITS)
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
if (phys_shift > kvm_ipa_limit) {
pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
current->comm);
return -EINVAL;
}
}
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
return 0;
}

View File

@@ -64,26 +64,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false;
}
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val = 0x8badf00d8badf00d;
if (vcpu->arch.sysregs_loaded_on_cpu &&
__vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
}
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
if (vcpu->arch.sysregs_loaded_on_cpu &&
__vcpu_write_sys_reg_to_cpu(val, reg))
return;
__vcpu_sys_reg(vcpu, reg) = val;
}
/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
static u32 cache_levels;
@@ -575,19 +555,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 mpidr;
/*
* Map the vcpu_id into the first three affinity level fields of
* the MPIDR. We limit the number of VCPUs in level 0 due to a
* limitation to 16 CPUs in that level in the ICC_SGIxR registers
* of the GICv3 to be able to address each CPU directly when
* sending IPIs.
*/
mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
vcpu_write_sys_reg(vcpu, calculate_mpidr(vcpu), MPIDR_EL1);
}
static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,

View File

@@ -183,6 +183,25 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
static inline u64 calculate_mpidr(const struct kvm_vcpu *vcpu)
{
u64 mpidr;
/*
* Map the vcpu_id into the first three affinity level fields of
* the MPIDR. We limit the number of VCPUs in level 0 due to a
* limitation to 16 CPUs in that level in the ICC_SGIxR registers
* of the GICv3 to be able to address each CPU directly when
* sending IPIs.
*/
mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
mpidr |= (1ULL << 31);
return mpidr;
}
const struct sys_reg_desc *find_reg_by_id(u64 id,
struct sys_reg_params *params,
const struct sys_reg_desc table[],

View File

@@ -470,17 +470,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
kvm_vgic_global_state.vctrl_base + GICH_APR);
}
void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
void vgic_v2_put(struct kvm_vcpu *vcpu, bool blocking)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
}
void vgic_v2_put(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
vgic_v2_vmcr_sync(vcpu);
cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
}

View File

@@ -707,15 +707,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
/*
* If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
* is dependent on ICC_SRE_EL1.SRE, and we have to perform the
* VMCR_EL2 save/restore in the world switch.
*/
if (likely(cpu_if->vgic_sre))
kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if);
if (likely(!is_protected_kvm_enabled()))
kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@@ -723,23 +716,14 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
WARN_ON(vgic_v4_load(vcpu));
}
void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
void vgic_v3_put(struct kvm_vcpu *vcpu, bool blocking)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
if (likely(cpu_if->vgic_sre))
cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
}
WARN_ON(vgic_v4_put(vcpu, blocking));
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
WARN_ON(vgic_v4_put(vcpu, false));
vgic_v3_vmcr_sync(vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
if (likely(!is_protected_kvm_enabled()))
kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_deactivate_traps(cpu_if);

View File

@@ -931,26 +931,15 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu)
vgic_v3_load(vcpu);
}
void kvm_vgic_put(struct kvm_vcpu *vcpu)
void kvm_vgic_put(struct kvm_vcpu *vcpu, bool blocking)
{
if (unlikely(!vgic_initialized(vcpu->kvm)))
return;
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_put(vcpu);
vgic_v2_put(vcpu, blocking);
else
vgic_v3_put(vcpu);
}
void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
{
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
return;
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_vmcr_sync(vcpu);
else
vgic_v3_vmcr_sync(vcpu);
vgic_v3_put(vcpu, blocking);
}
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)

View File

@@ -196,8 +196,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
void vgic_v2_init_lrs(void);
void vgic_v2_load(struct kvm_vcpu *vcpu);
void vgic_v2_put(struct kvm_vcpu *vcpu);
void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v2_put(struct kvm_vcpu *vcpu, bool blocking);
void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
@@ -227,8 +226,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
bool vgic_v3_check_base(struct kvm *kvm);
void vgic_v3_load(struct kvm_vcpu *vcpu);
void vgic_v3_put(struct kvm_vcpu *vcpu);
void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v3_put(struct kvm_vcpu *vcpu, bool blocking);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
ioremap.o mem_encrypt.o mmap.o pgd.o mmu.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o

View File

@@ -88,7 +88,7 @@ SYM_FUNC_END(caches_clean_inval_user_pou)
* - start - virtual start address of region
* - end - virtual end address of region
*/
SYM_FUNC_START(icache_inval_pou)
SYM_FUNC_START_PI(icache_inval_pou)
alternative_if ARM64_HAS_CACHE_DIC
isb
ret
@@ -96,7 +96,7 @@ alternative_else_nop_endif
invalidate_icache_by_line x0, x1, x2, x3
ret
SYM_FUNC_END(icache_inval_pou)
SYM_FUNC_END_PI(icache_inval_pou)
/*
* dcache_clean_inval_poc(start, end)

View File

@@ -9,13 +9,175 @@
* Copyright (C) 2012 ARM Ltd.
*/
#define pr_fmt(fmt) "ioremap: " fmt
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/arm-smccc.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
#include <asm/hypervisor.h>
struct ioremap_guard_ref {
refcount_t count;
};
static DEFINE_STATIC_KEY_FALSE(ioremap_guard_key);
static DEFINE_XARRAY(ioremap_guard_array);
static DEFINE_MUTEX(ioremap_guard_lock);
static bool ioremap_guard;
static int __init ioremap_guard_setup(char *str)
{
ioremap_guard = true;
return 0;
}
early_param("ioremap_guard", ioremap_guard_setup);
static void fixup_fixmap(void)
{
pte_t *ptep = __get_fixmap_pte(FIX_EARLYCON_MEM_BASE);
if (!ptep)
return;
ioremap_phys_range_hook(__pte_to_phys(*ptep), PAGE_SIZE,
__pgprot(pte_val(*ptep) & PTE_ATTRINDX_MASK));
}
void kvm_init_ioremap_services(void)
{
struct arm_smccc_res res;
if (!ioremap_guard)
return;
/* We need all the functions to be implemented */
if (!kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO) ||
!kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL) ||
!kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP) ||
!kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP))
return;
arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID,
0, 0, 0, &res);
if (res.a0 != PAGE_SIZE)
return;
arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID,
&res);
if (res.a0 == SMCCC_RET_SUCCESS) {
static_branch_enable(&ioremap_guard_key);
fixup_fixmap();
pr_info("Using KVM MMIO guard for ioremap\n");
} else {
pr_warn("KVM MMIO guard registration failed (%ld)\n", res.a0);
}
}
void ioremap_phys_range_hook(phys_addr_t phys_addr, size_t size, pgprot_t prot)
{
if (!static_branch_unlikely(&ioremap_guard_key))
return;
if (pfn_valid(__phys_to_pfn(phys_addr)))
return;
mutex_lock(&ioremap_guard_lock);
while (size) {
u64 pfn = phys_addr >> PAGE_SHIFT;
struct ioremap_guard_ref *ref;
struct arm_smccc_res res;
ref = xa_load(&ioremap_guard_array, pfn);
if (ref) {
refcount_inc(&ref->count);
goto next;
}
/*
* It is acceptable for the allocation to fail, specially
* if trying to ioremap something very early on, like with
* earlycon, which happens long before kmem_cache_init.
* This page will be permanently accessible, similar to a
* saturated refcount.
*/
ref = kzalloc(sizeof(*ref), GFP_KERNEL);
if (ref) {
refcount_set(&ref->count, 1);
if (xa_err(xa_store(&ioremap_guard_array, pfn, ref,
GFP_KERNEL))) {
kfree(ref);
ref = NULL;
}
}
arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID,
phys_addr, prot, &res);
if (res.a0 != SMCCC_RET_SUCCESS) {
pr_warn_ratelimited("Failed to register %llx\n",
phys_addr);
xa_erase(&ioremap_guard_array, pfn);
kfree(ref);
goto out;
}
next:
size -= PAGE_SIZE;
phys_addr += PAGE_SIZE;
}
out:
mutex_unlock(&ioremap_guard_lock);
}
void iounmap_phys_range_hook(phys_addr_t phys_addr, size_t size)
{
if (!static_branch_unlikely(&ioremap_guard_key))
return;
VM_BUG_ON(phys_addr & ~PAGE_MASK || size & ~PAGE_MASK);
mutex_lock(&ioremap_guard_lock);
while (size) {
u64 pfn = phys_addr >> PAGE_SHIFT;
struct ioremap_guard_ref *ref;
struct arm_smccc_res res;
ref = xa_load(&ioremap_guard_array, pfn);
if (!ref) {
pr_warn_ratelimited("%llx not tracked, left mapped\n",
phys_addr);
goto next;
}
if (!refcount_dec_and_test(&ref->count))
goto next;
xa_erase(&ioremap_guard_array, pfn);
kfree(ref);
arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID,
phys_addr, &res);
if (res.a0 != SMCCC_RET_SUCCESS) {
pr_warn_ratelimited("Failed to unregister %llx\n",
phys_addr);
goto out;
}
next:
size -= PAGE_SIZE;
phys_addr += PAGE_SIZE;
}
out:
mutex_unlock(&ioremap_guard_lock);
}
static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
pgprot_t prot, void *caller)

102
arch/arm64/mm/mem_encrypt.c Normal file
View File

@@ -0,0 +1,102 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Implementation of the memory encryption/decryption API.
*
* Amusingly, no crypto is actually performed. Rather, we call into the
* hypervisor component of KVM to expose pages selectively to the host
* for virtio "DMA" operations. In other words, "encrypted" pages are
* not accessible to the host, whereas "decrypted" pages are.
*
* Author: Will Deacon <will@kernel.org>
*/
#include <linux/arm-smccc.h>
#include <linux/mem_encrypt.h>
#include <linux/memory.h>
#include <linux/mm.h>
#include <linux/set_memory.h>
#include <linux/types.h>
#include <asm/hypervisor.h>
static unsigned long memshare_granule_sz;
bool mem_encrypt_active(void)
{
return memshare_granule_sz;
}
EXPORT_SYMBOL(mem_encrypt_active);
void kvm_init_memshare_services(void)
{
int i;
struct arm_smccc_res res;
const u32 funcs[] = {
ARM_SMCCC_KVM_FUNC_HYP_MEMINFO,
ARM_SMCCC_KVM_FUNC_MEM_SHARE,
ARM_SMCCC_KVM_FUNC_MEM_UNSHARE,
};
for (i = 0; i < ARRAY_SIZE(funcs); ++i) {
if (!kvm_arm_hyp_service_available(funcs[i]))
return;
}
arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID,
0, 0, 0, &res);
if (res.a0 > PAGE_SIZE) /* Includes error codes */
return;
memshare_granule_sz = res.a0;
}
static int arm_smccc_share_unshare_page(u32 func_id, phys_addr_t phys)
{
phys_addr_t end = phys + PAGE_SIZE;
while (phys < end) {
struct arm_smccc_res res;
arm_smccc_1_1_invoke(func_id, phys, 0, 0, &res);
if (res.a0 != SMCCC_RET_SUCCESS)
return -EPERM;
phys += memshare_granule_sz;
}
return 0;
}
static int set_memory_xcrypted(u32 func_id, unsigned long start, int numpages)
{
void *addr = (void *)start, *end = addr + numpages * PAGE_SIZE;
while (addr < end) {
int err;
err = arm_smccc_share_unshare_page(func_id, virt_to_phys(addr));
if (err)
return err;
addr += PAGE_SIZE;
}
return 0;
}
int set_memory_encrypted(unsigned long addr, int numpages)
{
if (!memshare_granule_sz || WARN_ON(!PAGE_ALIGNED(addr)))
return 0;
return set_memory_xcrypted(ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID,
addr, numpages);
}
int set_memory_decrypted(unsigned long addr, int numpages)
{
if (!memshare_granule_sz || WARN_ON(!PAGE_ALIGNED(addr)))
return 0;
return set_memory_xcrypted(ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID,
addr, numpages);
}

View File

@@ -1286,6 +1286,21 @@ void __set_fixmap(enum fixed_addresses idx,
}
}
pte_t *__get_fixmap_pte(enum fixed_addresses idx)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
ptep = fixmap_pte(addr);
if (!pte_valid(*ptep))
return NULL;
return ptep;
}
void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
{
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);

View File

@@ -9,6 +9,8 @@
#include <asm/hypervisor.h>
void __weak kvm_arm_init_hyp_services(void) {}
static DECLARE_BITMAP(__kvm_arm_hyp_services, ARM_SMCCC_KVM_NUM_FUNCS) __ro_after_init = { };
void __init kvm_init_hyp_services(void)
@@ -38,6 +40,8 @@ void __init kvm_init_hyp_services(void)
pr_info("hypervisor services detected (0x%08lx 0x%08lx 0x%08lx 0x%08lx)\n",
res.a3, res.a2, res.a1, res.a0);
kvm_arm_init_hyp_services();
}
bool kvm_arm_hyp_service_available(u32 func_id)

View File

@@ -19,6 +19,7 @@
#include <linux/list.h>
#include <linux/log2.h>
#include <linux/memblock.h>
#include <linux/mem_encrypt.h>
#include <linux/mm.h>
#include <linux/msi.h>
#include <linux/of.h>

View File

@@ -485,6 +485,18 @@ config HISI_HIKEY_USB
switching between the dual-role USB-C port and the USB-A host ports
using only one USB controller.
config OPEN_DICE
tristate "Open Profile for DICE driver"
depends on OF_RESERVED_MEM
help
This driver exposes a DICE reserved memory region to userspace via
a character device. The memory region contains Compound Device
Identifiers (CDIs) generated by firmware as an output of DICE
measured boot flow. Userspace can use CDIs for remote attestation
and sealing.
If unsure, say N.
source "drivers/misc/c2port/Kconfig"
source "drivers/misc/eeprom/Kconfig"
source "drivers/misc/cb710/Kconfig"

View File

@@ -60,3 +60,4 @@ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o
obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o
obj-$(CONFIG_OPEN_DICE) += open-dice.o

188
drivers/misc/open-dice.c Normal file
View File

@@ -0,0 +1,188 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021 - Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*
* Driver for Open Profile for DICE.
*
* This driver takes ownership of a reserved memory region containing data
* generated by the Open Profile for DICE measured boot protocol. The memory
* contents are not interpreted by the kernel but can be mapped into a userspace
* process via a misc device. Userspace can also request a wipe of the memory.
*
* Userspace can access the data with (w/o error handling):
*
* fd = open("/dev/open-dice0", O_RDWR);
* read(fd, &size, sizeof(unsigned long));
* data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
* write(fd, NULL, 0); // wipe
* close(fd);
*/
#include <linux/io.h>
#include <linux/miscdevice.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/of_reserved_mem.h>
#include <linux/platform_device.h>
#define DRIVER_NAME "open-dice"
struct open_dice_drvdata {
spinlock_t lock;
char name[16];
struct reserved_mem *rmem;
struct miscdevice misc;
};
static inline struct open_dice_drvdata *to_open_dice_drvdata(struct file *filp)
{
return container_of(filp->private_data, struct open_dice_drvdata, misc);
}
static int open_dice_wipe(struct open_dice_drvdata *drvdata)
{
void *kaddr;
spin_lock(&drvdata->lock);
kaddr = devm_memremap(drvdata->misc.this_device, drvdata->rmem->base,
drvdata->rmem->size, MEMREMAP_WC);
if (IS_ERR(kaddr)) {
spin_unlock(&drvdata->lock);
return PTR_ERR(kaddr);
}
memset(kaddr, 0, drvdata->rmem->size);
devm_memunmap(drvdata->misc.this_device, kaddr);
spin_unlock(&drvdata->lock);
return 0;
}
/*
* Copies the size of the reserved memory region to the user-provided buffer.
*/
static ssize_t open_dice_read(struct file *filp, char __user *ptr, size_t len,
loff_t *off)
{
unsigned long val = to_open_dice_drvdata(filp)->rmem->size;
return simple_read_from_buffer(ptr, len, off, &val, sizeof(val));
}
/*
* Triggers a wipe of the reserved memory region. The user-provided pointer
* is never dereferenced.
*/
static ssize_t open_dice_write(struct file *filp, const char __user *ptr,
size_t len, loff_t *off)
{
if (open_dice_wipe(to_open_dice_drvdata(filp)))
return -EIO;
/* Consume the input buffer. */
return len;
}
/*
* Creates a mapping of the reserved memory region in user address space.
*/
static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp);
/* Do not allow userspace to modify the underlying data. */
if ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))
return -EPERM;
/* Create write-combine mapping so all clients observe a wipe. */
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP;
return vm_iomap_memory(vma, drvdata->rmem->base, drvdata->rmem->size);
}
static const struct file_operations open_dice_fops = {
.owner = THIS_MODULE,
.read = open_dice_read,
.write = open_dice_write,
.mmap = open_dice_mmap,
};
static int __init open_dice_probe(struct platform_device *pdev)
{
static unsigned int dev_idx;
struct device *dev = &pdev->dev;
struct reserved_mem *rmem;
struct open_dice_drvdata *drvdata;
int ret;
rmem = of_reserved_mem_lookup(dev->of_node);
if (!rmem) {
dev_err(dev, "failed to lookup reserved memory\n");
return -EINVAL;
}
if (!rmem->size || (rmem->size > ULONG_MAX)) {
dev_err(dev, "invalid memory region size\n");
return -EINVAL;
}
if (!PAGE_ALIGNED(rmem->base) || !PAGE_ALIGNED(rmem->size)) {
dev_err(dev, "memory region must be page-aligned\n");
return -EINVAL;
}
drvdata = devm_kmalloc(dev, sizeof(*drvdata), GFP_KERNEL);
if (!drvdata)
return -ENOMEM;
*drvdata = (struct open_dice_drvdata){
.lock = __SPIN_LOCK_UNLOCKED(drvdata->lock),
.rmem = rmem,
.misc = (struct miscdevice){
.parent = dev,
.name = drvdata->name,
.minor = MISC_DYNAMIC_MINOR,
.fops = &open_dice_fops,
.mode = 0600,
},
};
/* Index overflow check not needed, misc_register() will fail. */
snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++);
ret = misc_register(&drvdata->misc);
if (ret) {
dev_err(dev, "failed to register misc device '%s': %d\n",
drvdata->name, ret);
return ret;
}
platform_set_drvdata(pdev, drvdata);
return 0;
}
static int open_dice_remove(struct platform_device *pdev)
{
struct open_dice_drvdata *drvdata = platform_get_drvdata(pdev);
misc_deregister(&drvdata->misc);
return 0;
}
static const struct of_device_id open_dice_of_match[] = {
{ .compatible = "google,open-dice" },
{},
};
static struct platform_driver open_dice_driver = {
.remove = open_dice_remove,
.driver = {
.name = DRIVER_NAME,
.of_match_table = open_dice_of_match,
},
};
module_platform_driver_probe(open_dice_driver, open_dice_probe);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("David Brazdil <dbrazdil@google.com>");

View File

@@ -511,6 +511,7 @@ static const struct of_device_id reserved_mem_matches[] = {
{ .compatible = "qcom,cmd-db" },
{ .compatible = "ramoops" },
{ .compatible = "nvmem-rmem" },
{ .compatible = "google,open-dice" },
{}
};

View File

@@ -380,8 +380,7 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
void kvm_vgic_load(struct kvm_vcpu *vcpu);
void kvm_vgic_put(struct kvm_vcpu *vcpu);
void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu);
void kvm_vgic_put(struct kvm_vcpu *vcpu, bool blocking);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) ((k)->arch.vgic.initialized)

View File

@@ -107,6 +107,13 @@
/* KVM "vendor specific" services */
#define ARM_SMCCC_KVM_FUNC_FEATURES 0
#define ARM_SMCCC_KVM_FUNC_PTP 1
#define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO 2
#define ARM_SMCCC_KVM_FUNC_MEM_SHARE 3
#define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE 4
#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO 5
#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL 6
#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP 7
#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP 8
#define ARM_SMCCC_KVM_FUNC_FEATURES_2 127
#define ARM_SMCCC_KVM_NUM_FUNCS 128
@@ -129,10 +136,52 @@
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_PTP)
#define ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_HYP_MEMINFO)
#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_MEM_SHARE)
#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_MEM_UNSHARE)
/* ptp_kvm counter type ID */
#define KVM_PTP_VIRT_COUNTER 0
#define KVM_PTP_PHYS_COUNTER 1
#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO)
#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL)
#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP)
#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP)
/* Paravirtualised time calls (defined by ARM DEN0057A) */
#define ARM_SMCCC_HV_PV_TIME_FEATURES \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \

View File

@@ -21,6 +21,8 @@ void __ioread32_copy(void *to, const void __iomem *from, size_t count);
void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
#ifdef CONFIG_MMU
void ioremap_phys_range_hook(phys_addr_t phys_addr, size_t size, pgprot_t prot);
void iounmap_phys_range_hook(phys_addr_t phys_addr, size_t size);
int ioremap_page_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot);
#else

View File

@@ -863,6 +863,12 @@ struct kvm_ppc_resize_hpt {
#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL
#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \
((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
#define KVM_VM_TYPE_ARM_PROTECTED (1UL << 31)
#define KVM_VM_TYPE_MASK (KVM_VM_TYPE_ARM_IPA_SIZE_MASK | \
KVM_VM_TYPE_ARM_PROTECTED)
/*
* ioctls for /dev/kvm fds:
*/
@@ -1112,6 +1118,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_BINARY_STATS_FD 203
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_ARM_PROTECTED_VM 0xffbadab1
#ifdef KVM_CAP_IRQ_ROUTING

View File

@@ -897,6 +897,11 @@ config IO_MAPPING
config SECRETMEM
def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
# Some architectures want callbacks for all IO mappings in order to
# track the physical addresses that get used as devices.
config ARCH_HAS_IOREMAP_PHYS_HOOKS
bool
source "mm/damon/Kconfig"
endmenu

View File

@@ -38,6 +38,7 @@
#include <linux/pgtable.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <linux/io.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
@@ -316,9 +317,14 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
{
int err;
err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
prot = pgprot_nx(prot);
err = vmap_range_noflush(addr, end, phys_addr, prot,
ioremap_max_page_shift);
flush_cache_vmap(addr, end);
if (IS_ENABLED(CONFIG_ARCH_HAS_IOREMAP_PHYS_HOOKS) && !err)
ioremap_phys_range_hook(phys_addr, end - addr, prot);
return err;
}
@@ -2608,6 +2614,10 @@ static void __vunmap(const void *addr, int deallocate_pages)
kasan_poison_vmalloc(area->addr, get_vm_area_size(area));
if (IS_ENABLED(CONFIG_ARCH_HAS_IOREMAP_PHYS_HOOKS) &&
area->flags & VM_IOREMAP)
iounmap_phys_range_hook(area->phys_addr, get_vm_area_size(area));
vm_remove_mappings(area, deallocate_pages);
if (deallocate_pages) {