ANDROID: KVM: arm64: Initialize shadow vcpus with separate hypercalls

This is done as the first step towards donating memory per vcpu in future patches without having to spend potentially too much time in one hypercall. Moreover, this has the nice effect of removing the need for stashing the host vcpus in the memory donated for the pgd. Bug: 232070947 Signed-off-by: Fuad Tabba <tabba@google.com> Change-Id: I491c358fa29dd62ffc45347d6288696c846d5fc3
2022-10-10 09:52:27 +01:00
parent 31d3b09cb7
commit 76094a9979
5 changed files with 109 additions and 73 deletions
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -76,6 +76,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
 	__KVM_HOST_SMCCC_FUNC___pkvm_init_shadow,
+	__KVM_HOST_SMCCC_FUNC___pkvm_init_shadow_vcpu,
 	__KVM_HOST_SMCCC_FUNC___pkvm_teardown_shadow,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -42,6 +42,12 @@ struct kvm_shadow_vm {
 	/* The total size of the donated shadow area. */
 	size_t shadow_area_size;

+	/*
+	 * The number of vcpus initialized and ready to run in the shadow vm.
+	 * Modifying this is protected by shadow_lock.
+	 */
+	unsigned int nr_vcpus;
+
 	struct kvm_arch arch;
 	struct kvm_pgtable pgt;
 	struct kvm_pgtable_mm_ops mm_ops;
@@ -65,6 +71,7 @@ extern phys_addr_t pvmfw_size;

 void hyp_shadow_table_init(void *tbl);
 int __pkvm_init_shadow(struct kvm *kvm, void *shadow_va, size_t size, void *pgd);
+int __pkvm_init_shadow_vcpu(unsigned int shadow_handle, struct kvm_vcpu *host_vcpu);
 int __pkvm_teardown_shadow(int shadow_handle);
 struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx);
 void put_shadow_vcpu(struct kvm_vcpu *vcpu);
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -1006,6 +1006,14 @@ static void handle___pkvm_init_shadow(struct kvm_cpu_context *host_ctxt)
 						   shadow_size, pgd);
 }

+static void handle___pkvm_init_shadow_vcpu(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(unsigned int, shadow_handle, host_ctxt, 1);
+	DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2);
+
+	cpu_reg(host_ctxt, 1) =	__pkvm_init_shadow_vcpu(shadow_handle, host_vcpu);
+}
+
 static void handle___pkvm_teardown_shadow(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(int, shadow_handle, host_ctxt, 1);
@@ -1079,6 +1087,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
 	HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
 	HANDLE_FUNC(__pkvm_init_shadow),
+	HANDLE_FUNC(__pkvm_init_shadow_vcpu),
 	HANDLE_FUNC(__pkvm_teardown_shadow),
 	HANDLE_FUNC(__pkvm_vcpu_load),
 	HANDLE_FUNC(__pkvm_vcpu_put),
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -276,7 +276,7 @@ struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx)

 	hyp_spin_lock(&shadow_lock);
 	vm = find_shadow_by_handle(shadow_handle);
-	if (!vm || vm->created_vcpus <= vcpu_idx)
+	if (!vm || vm->nr_vcpus <= vcpu_idx)
 		goto unlock;
 	vcpu = &vm->shadow_vcpus[vcpu_idx].vcpu;

@@ -390,28 +390,6 @@ static void unpin_host_vcpus(struct shadow_vcpu_state *shadow_vcpus, int nr_vcpu
 		unpin_host_vcpu(&shadow_vcpus[i]);
 }

-static int set_host_vcpus(struct shadow_vcpu_state *shadow_vcpus, int nr_vcpus,
-			  struct kvm_vcpu **vcpu_array, size_t vcpu_array_size)
-{
-	int i;
-
-	if (vcpu_array_size < sizeof(*vcpu_array) * nr_vcpus)
-		return -EINVAL;
-
-	for (i = 0; i < nr_vcpus; i++) {
-		struct kvm_vcpu *host_vcpu = kern_hyp_va(vcpu_array[i]);
-
-		if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) {
-			unpin_host_vcpus(shadow_vcpus, i);
-			return -EBUSY;
-		}
-
-		shadow_vcpus[i].vcpu.arch.pkvm.host_vcpu = host_vcpu;
-	}
-
-	return 0;
-}
-
 static int init_ptrauth(struct kvm_vcpu *shadow_vcpu)
 {
 	int ret = 0;
@@ -431,23 +409,33 @@ static void init_shadow_vm(struct kvm *kvm, struct kvm_shadow_vm *vm,
 }

 static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state,
+			    struct kvm_vcpu *host_vcpu,
 			    struct kvm_shadow_vm *vm, int vcpu_idx)
 {
 	struct kvm_vcpu *shadow_vcpu = &shadow_state->vcpu;
-	struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu;
 	int ret;

+	host_vcpu = kern_hyp_va(host_vcpu);
+	if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
+		return -EBUSY;
+
+	if (host_vcpu->vcpu_idx != vcpu_idx) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	shadow_vcpu->arch.pkvm.host_vcpu = host_vcpu;
 	shadow_vcpu->kvm = vm->host_kvm;
 	shadow_vcpu->vcpu_id = host_vcpu->vcpu_id;
 	shadow_vcpu->vcpu_idx = vcpu_idx;

 	ret = copy_features(shadow_vcpu, host_vcpu);
 	if (ret)
-		return ret;
+		goto done;

 	ret = init_ptrauth(shadow_vcpu);
 	if (ret)
-		return ret;
+		goto done;

 	if (test_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features)) {
 		size_t sve_state_size;
@@ -466,7 +454,8 @@ static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state,
 			clear_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features);
 			shadow_vcpu->arch.sve_state = NULL;
 			shadow_vcpu->arch.sve_max_vl = 0;
-			return -EINVAL;
+			ret = -EINVAL;
+			goto done;
 		}
 	}

@@ -485,8 +474,10 @@ static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state,
 		shadow_vcpu->arch.pkvm.power_state =
 			PSCI_0_2_AFFINITY_LEVEL_OFF;
 	} else if (pvm_has_pvmfw(vm)) {
-		if (vm->pvmfw_entry_vcpu)
-			return -EINVAL;
+		if (vm->pvmfw_entry_vcpu) {
+			ret = -EINVAL;
+			goto done;
+		}

 		vm->pvmfw_entry_vcpu = shadow_vcpu;
 		shadow_vcpu->arch.reset_state.reset = true;
@@ -503,24 +494,11 @@ static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state,
 			PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
 	}

-	return 0;
-}
+done:
+	if (ret)
+		unpin_host_vcpu(shadow_state);

-static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm,
-			       int nr_vcpus)
-{
-	int i;
-
-	init_shadow_vm(kvm, vm, nr_vcpus);
-
-	for (i = 0; i < nr_vcpus; i++) {
-		int ret = init_shadow_vcpu(&vm->shadow_vcpus[i], vm, i);
-
-		if (ret)
-			return ret;
-	}
-
-	return 0;
+	return ret;
 }

 static bool __exists_shadow(struct kvm *host_kvm)
@@ -663,8 +641,6 @@ static void drain_shadow_vcpus(struct shadow_vcpu_state *shadow_vcpus,
 * 		Must be a multiple of the page size.
 * pgd: The host va of the area being donated for the stage-2 PGD for the VM.
 * 	Must be page aligned. Its size is implied by the VM's VTCR.
- * Note: An array to the host KVM VCPUs (host VA) is passed via the pgd, as to
- * 	 not to be dependent on how the VCPU's are layed out in struct kvm.
 *
 * Return a unique handle to the protected VM on success,
 * negative error code on failure.
@@ -716,19 +692,13 @@ int __pkvm_init_shadow(struct kvm *kvm,
 	if (ret)
 		goto err_remove_mappings;

-	ret = set_host_vcpus(vm->shadow_vcpus, nr_vcpus, pgd, pgd_size);
-	if (ret)
-		goto err_remove_pgd;
-
-	ret = init_shadow_structs(kvm, vm, nr_vcpus);
-	if (ret < 0)
-		goto err_unpin_host_vcpus;
+	init_shadow_vm(kvm, vm, nr_vcpus);

 	/* Add the entry to the shadow table. */
 	hyp_spin_lock(&shadow_lock);
 	ret = insert_shadow_table(kvm, vm, shadow_size);
 	if (ret < 0)
-		goto err_unlock_unpin_host_vcpus;
+		goto err_unlock;

 	ret = kvm_guest_prepare_stage2(vm, pgd);
 	if (ret)
@@ -739,24 +709,60 @@ int __pkvm_init_shadow(struct kvm *kvm,

 err_remove_shadow_table:
 	remove_shadow_table(vm->shadow_handle);
-err_unlock_unpin_host_vcpus:
+err_unlock:
 	hyp_spin_unlock(&shadow_lock);
-err_unpin_host_vcpus:
-	unpin_host_vcpus(vm->shadow_vcpus, nr_vcpus);
-err_remove_pgd:
 	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pgd_pages));
-
 err_remove_mappings:
 	/* Clear the donated shadow memory on failure to avoid data leaks. */
 	memset(vm, 0, shadow_size);
 	WARN_ON(__pkvm_hyp_donate_host(hyp_phys_to_pfn(shadow_pa),
 				       shadow_size >> PAGE_SHIFT));
-
 err:
 	hyp_unpin_shared_mem(kvm, kvm + 1);
 	return ret;
 }

+/*
+ * Initialize the protected vcpu state shadow copy in host-donated memory.
+ *
+ * shadow_handle: The handle for the protected vm.
+ * host_vcpu: A pointer to the corresponding host vcpu (host va).
+ *
+ * Return 0 on success, negative error code on failure.
+ */
+int __pkvm_init_shadow_vcpu(unsigned int shadow_handle,
+			    struct kvm_vcpu *host_vcpu)
+{
+	struct kvm_shadow_vm *vm;
+	struct shadow_vcpu_state *shadow_state;
+	unsigned int idx;
+	int ret;
+
+	hyp_spin_lock(&shadow_lock);
+
+	vm = find_shadow_by_handle(shadow_handle);
+	if (!vm) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	idx = vm->nr_vcpus;
+	if (idx >= vm->created_vcpus) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	shadow_state = &vm->shadow_vcpus[idx];
+	ret = init_shadow_vcpu(shadow_state, host_vcpu, vm, idx);
+	if (ret)
+		goto unlock;
+
+	vm->nr_vcpus++;
+unlock:
+	hyp_spin_unlock(&shadow_lock);
+	return ret;
+}
+
 static void teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr,
 				    size_t size)
 {
@@ -777,6 +783,7 @@ int __pkvm_teardown_shadow(int shadow_handle)
 	struct kvm_hyp_memcache *mc;
 	struct kvm_shadow_vm *vm;
 	struct kvm *host_kvm;
+	unsigned int nr_vcpus;
 	int err;
 	int i;

@@ -794,6 +801,7 @@ int __pkvm_teardown_shadow(int shadow_handle)
 	}

 	host_kvm = vm->host_kvm;
+	nr_vcpus = vm->nr_vcpus;

 	/*
 	 * Clear the tracking for last_loaded_vcpu for all cpus for this vm in
@@ -816,8 +824,8 @@ int __pkvm_teardown_shadow(int shadow_handle)
 	/* Reclaim guest pages, and page-table pages */
 	mc = &host_kvm->arch.pkvm.teardown_mc;
 	reclaim_guest_pages(vm, mc);
-	drain_shadow_vcpus(vm->shadow_vcpus, vm->created_vcpus, mc);
-	unpin_host_vcpus(vm->shadow_vcpus, vm->created_vcpus);
+	drain_shadow_vcpus(vm->shadow_vcpus, nr_vcpus, mc);
+	unpin_host_vcpus(vm->shadow_vcpus, nr_vcpus);

 	teardown_donated_memory(mc, vm, vm->shadow_area_size);

@@ -942,7 +950,7 @@ struct kvm_vcpu *pvm_mpidr_to_vcpu(struct kvm_shadow_vm *vm, unsigned long mpidr

 	mpidr &= MPIDR_HWID_BITMASK;

-	for (i = 0; i < vm->created_vcpus; i++) {
+	for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) {
 		vcpu = vm->vcpus[i];

 		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
@@ -1059,7 +1067,7 @@ static bool pvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 	 * then if at least one is PENDING_ON then return PENDING_ON.
 	 * Otherwise, return OFF.
 	 */
-	for (i = 0; i < vm->created_vcpus; i++) {
+	for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) {
 		tmp = vm->vcpus[i];
 		mpidr = kvm_vcpu_get_mpidr_aff(tmp);

--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -117,7 +117,7 @@ void __init kvm_hyp_reserve(void)
 */
 static int __create_el2_shadow(struct kvm *kvm)
 {
-	struct kvm_vcpu *vcpu, **vcpu_array;
+	struct kvm_vcpu *vcpu;
 	size_t pgd_sz, shadow_sz;
 	void *pgd, *shadow_addr;
 	unsigned long idx;
@@ -146,12 +146,6 @@ static int __create_el2_shadow(struct kvm *kvm)
 		goto free_pgd;
 	}

-	/* Stash the vcpu pointers into the PGD */
-	BUILD_BUG_ON(KVM_MAX_VCPUS > (PAGE_SIZE / sizeof(u64)));
-	vcpu_array = pgd;
-	kvm_for_each_vcpu(idx, vcpu, kvm)
-		vcpu_array[idx] = vcpu;
-
 	/* Donate the shadow memory to hyp and let hyp initialize it. */
 	ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz,
 				pgd);
@@ -163,8 +157,25 @@ static int __create_el2_shadow(struct kvm *kvm)
 	/* Store the shadow handle given by hyp for future call reference. */
 	kvm->arch.pkvm.shadow_handle = shadow_handle;

+	/* Initialize the shadow vcpus.  */
+	kvm_for_each_vcpu (idx, vcpu, kvm) {
+		/* Indexing of the vcpus to be sequential starting at 0. */
+		if (WARN_ON(vcpu->vcpu_idx != idx)) {
+			ret = -EINVAL;
+			goto destroy_vm;
+		}
+
+		ret = kvm_call_hyp_nvhe(__pkvm_init_shadow_vcpu, shadow_handle,
+					vcpu);
+		if (ret)
+			goto destroy_vm;
+	}
+
 	return 0;

+destroy_vm:
+	kvm_shadow_destroy(kvm);
+	return ret;
 free_shadow:
 	free_pages_exact(shadow_addr, shadow_sz);
 free_pgd: