pkvm.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2020 - Google LLC
  4. * Author: Quentin Perret <qperret@google.com>
  5. */
  6. #include <linux/init.h>
  7. #include <linux/interval_tree_generic.h>
  8. #include <linux/kmemleak.h>
  9. #include <linux/kvm_host.h>
  10. #include <asm/kvm_mmu.h>
  11. #include <linux/memblock.h>
  12. #include <linux/mutex.h>
  13. #include <asm/kvm_pkvm.h>
  14. #include "hyp_constants.h"
  15. DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
  16. static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
  17. static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
  18. phys_addr_t hyp_mem_base;
  19. phys_addr_t hyp_mem_size;
  20. static int __init register_memblock_regions(void)
  21. {
  22. struct memblock_region *reg;
  23. for_each_mem_region(reg) {
  24. if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
  25. return -ENOMEM;
  26. hyp_memory[*hyp_memblock_nr_ptr] = *reg;
  27. (*hyp_memblock_nr_ptr)++;
  28. }
  29. return 0;
  30. }
  31. void __init kvm_hyp_reserve(void)
  32. {
  33. u64 hyp_mem_pages = 0;
  34. int ret;
  35. if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
  36. return;
  37. if (kvm_get_mode() != KVM_MODE_PROTECTED)
  38. return;
  39. ret = register_memblock_regions();
  40. if (ret) {
  41. *hyp_memblock_nr_ptr = 0;
  42. kvm_err("Failed to register hyp memblocks: %d\n", ret);
  43. return;
  44. }
  45. hyp_mem_pages += hyp_s1_pgtable_pages();
  46. hyp_mem_pages += host_s2_pgtable_pages();
  47. hyp_mem_pages += hyp_vm_table_pages();
  48. hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
  49. hyp_mem_pages += pkvm_selftest_pages();
  50. hyp_mem_pages += hyp_ffa_proxy_pages();
  51. /*
  52. * Try to allocate a PMD-aligned region to reduce TLB pressure once
  53. * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
  54. */
  55. hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
  56. hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
  57. PMD_SIZE);
  58. if (!hyp_mem_base)
  59. hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
  60. else
  61. hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
  62. if (!hyp_mem_base) {
  63. kvm_err("Failed to reserve hyp memory\n");
  64. return;
  65. }
  66. kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
  67. hyp_mem_base);
  68. }
  69. static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
  70. {
  71. if (pkvm_hyp_vm_is_created(kvm)) {
  72. WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
  73. kvm->arch.pkvm.handle));
  74. } else if (kvm->arch.pkvm.handle) {
  75. /*
  76. * The VM could have been reserved but hyp initialization has
  77. * failed. Make sure to unreserve it.
  78. */
  79. kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle);
  80. }
  81. kvm->arch.pkvm.handle = 0;
  82. kvm->arch.pkvm.is_created = false;
  83. free_hyp_memcache(&kvm->arch.pkvm.teardown_mc);
  84. free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc);
  85. }
  86. static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
  87. {
  88. size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
  89. pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
  90. void *hyp_vcpu;
  91. int ret;
  92. vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
  93. hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
  94. if (!hyp_vcpu)
  95. return -ENOMEM;
  96. ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
  97. if (!ret)
  98. vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
  99. else
  100. free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
  101. return ret;
  102. }
  103. /*
  104. * Allocates and donates memory for hypervisor VM structs at EL2.
  105. *
  106. * Allocates space for the VM state, which includes the hyp vm as well as
  107. * the hyp vcpus.
  108. *
  109. * Stores an opaque handler in the kvm struct for future reference.
  110. *
  111. * Return 0 on success, negative error code on failure.
  112. */
  113. static int __pkvm_create_hyp_vm(struct kvm *kvm)
  114. {
  115. size_t pgd_sz, hyp_vm_sz;
  116. void *pgd, *hyp_vm;
  117. int ret;
  118. if (kvm->created_vcpus < 1)
  119. return -EINVAL;
  120. pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr);
  121. /*
  122. * The PGD pages will be reclaimed using a hyp_memcache which implies
  123. * page granularity. So, use alloc_pages_exact() to get individual
  124. * refcounts.
  125. */
  126. pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
  127. if (!pgd)
  128. return -ENOMEM;
  129. /* Allocate memory to donate to hyp for vm and vcpu pointers. */
  130. hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
  131. size_mul(sizeof(void *),
  132. kvm->created_vcpus)));
  133. hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
  134. if (!hyp_vm) {
  135. ret = -ENOMEM;
  136. goto free_pgd;
  137. }
  138. /* Donate the VM memory to hyp and let hyp initialize it. */
  139. ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd);
  140. if (ret)
  141. goto free_vm;
  142. kvm->arch.pkvm.is_created = true;
  143. kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
  144. kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
  145. return 0;
  146. free_vm:
  147. free_pages_exact(hyp_vm, hyp_vm_sz);
  148. free_pgd:
  149. free_pages_exact(pgd, pgd_sz);
  150. return ret;
  151. }
  152. bool pkvm_hyp_vm_is_created(struct kvm *kvm)
  153. {
  154. return READ_ONCE(kvm->arch.pkvm.is_created);
  155. }
  156. int pkvm_create_hyp_vm(struct kvm *kvm)
  157. {
  158. int ret = 0;
  159. mutex_lock(&kvm->arch.config_lock);
  160. if (!pkvm_hyp_vm_is_created(kvm))
  161. ret = __pkvm_create_hyp_vm(kvm);
  162. mutex_unlock(&kvm->arch.config_lock);
  163. return ret;
  164. }
  165. int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
  166. {
  167. int ret = 0;
  168. mutex_lock(&vcpu->kvm->arch.config_lock);
  169. if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
  170. ret = __pkvm_create_hyp_vcpu(vcpu);
  171. mutex_unlock(&vcpu->kvm->arch.config_lock);
  172. return ret;
  173. }
  174. void pkvm_destroy_hyp_vm(struct kvm *kvm)
  175. {
  176. mutex_lock(&kvm->arch.config_lock);
  177. __pkvm_destroy_hyp_vm(kvm);
  178. mutex_unlock(&kvm->arch.config_lock);
  179. }
  180. int pkvm_init_host_vm(struct kvm *kvm)
  181. {
  182. int ret;
  183. if (pkvm_hyp_vm_is_created(kvm))
  184. return -EINVAL;
  185. /* VM is already reserved, no need to proceed. */
  186. if (kvm->arch.pkvm.handle)
  187. return 0;
  188. /* Reserve the VM in hyp and obtain a hyp handle for the VM. */
  189. ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm);
  190. if (ret < 0)
  191. return ret;
  192. kvm->arch.pkvm.handle = ret;
  193. return 0;
  194. }
  195. static void __init _kvm_host_prot_finalize(void *arg)
  196. {
  197. int *err = arg;
  198. if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
  199. WRITE_ONCE(*err, -EINVAL);
  200. }
  201. static int __init pkvm_drop_host_privileges(void)
  202. {
  203. int ret = 0;
  204. /*
  205. * Flip the static key upfront as that may no longer be possible
  206. * once the host stage 2 is installed.
  207. */
  208. static_branch_enable(&kvm_protected_mode_initialized);
  209. on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
  210. return ret;
  211. }
  212. static int __init finalize_pkvm(void)
  213. {
  214. int ret;
  215. if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
  216. return 0;
  217. /*
  218. * Exclude HYP sections from kmemleak so that they don't get peeked
  219. * at, which would end badly once inaccessible.
  220. */
  221. kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
  222. kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
  223. kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
  224. kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
  225. ret = pkvm_drop_host_privileges();
  226. if (ret)
  227. pr_err("Failed to finalize Hyp protection: %d\n", ret);
  228. return ret;
  229. }
  230. device_initcall_sync(finalize_pkvm);
  231. static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
  232. {
  233. return m->gfn * PAGE_SIZE;
  234. }
  235. static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
  236. {
  237. return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
  238. }
  239. INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
  240. __pkvm_mapping_start, __pkvm_mapping_end, static,
  241. pkvm_mapping);
  242. /*
  243. * __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow
  244. * freeing of __map inline.
  245. */
  246. #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
  247. for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \
  248. __start, __end - 1); \
  249. __tmp && ({ \
  250. __map = __tmp; \
  251. __tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \
  252. true; \
  253. }); \
  254. )
  255. int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
  256. struct kvm_pgtable_mm_ops *mm_ops)
  257. {
  258. pgt->pkvm_mappings = RB_ROOT_CACHED;
  259. pgt->mmu = mmu;
  260. return 0;
  261. }
  262. static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
  263. {
  264. struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
  265. pkvm_handle_t handle = kvm->arch.pkvm.handle;
  266. struct pkvm_mapping *mapping;
  267. int ret;
  268. if (!handle)
  269. return 0;
  270. for_each_mapping_in_range_safe(pgt, start, end, mapping) {
  271. ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
  272. mapping->nr_pages);
  273. if (WARN_ON(ret))
  274. return ret;
  275. pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
  276. kfree(mapping);
  277. }
  278. return 0;
  279. }
  280. void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
  281. u64 addr, u64 size)
  282. {
  283. __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
  284. }
  285. void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
  286. {
  287. /* Expected to be called after all pKVM mappings have been released. */
  288. WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
  289. }
  290. int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
  291. u64 phys, enum kvm_pgtable_prot prot,
  292. void *mc, enum kvm_pgtable_walk_flags flags)
  293. {
  294. struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
  295. struct pkvm_mapping *mapping = NULL;
  296. struct kvm_hyp_memcache *cache = mc;
  297. u64 gfn = addr >> PAGE_SHIFT;
  298. u64 pfn = phys >> PAGE_SHIFT;
  299. int ret;
  300. if (size != PAGE_SIZE && size != PMD_SIZE)
  301. return -EINVAL;
  302. lockdep_assert_held_write(&kvm->mmu_lock);
  303. /*
  304. * Calling stage2_map() on top of existing mappings is either happening because of a race
  305. * with another vCPU, or because we're changing between page and block mappings. As per
  306. * user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
  307. */
  308. mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
  309. if (mapping) {
  310. if (size == (mapping->nr_pages * PAGE_SIZE))
  311. return -EAGAIN;
  312. /* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
  313. ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
  314. if (ret)
  315. return ret;
  316. mapping = NULL;
  317. }
  318. ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot);
  319. if (WARN_ON(ret))
  320. return ret;
  321. swap(mapping, cache->mapping);
  322. mapping->gfn = gfn;
  323. mapping->pfn = pfn;
  324. mapping->nr_pages = size / PAGE_SIZE;
  325. pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
  326. return ret;
  327. }
  328. int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
  329. {
  330. lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
  331. return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
  332. }
  333. int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
  334. {
  335. struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
  336. pkvm_handle_t handle = kvm->arch.pkvm.handle;
  337. struct pkvm_mapping *mapping;
  338. int ret = 0;
  339. lockdep_assert_held(&kvm->mmu_lock);
  340. for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
  341. ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
  342. mapping->nr_pages);
  343. if (WARN_ON(ret))
  344. break;
  345. }
  346. return ret;
  347. }
  348. int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
  349. {
  350. struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
  351. struct pkvm_mapping *mapping;
  352. lockdep_assert_held(&kvm->mmu_lock);
  353. for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
  354. __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
  355. PAGE_SIZE * mapping->nr_pages);
  356. return 0;
  357. }
  358. bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
  359. {
  360. struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
  361. pkvm_handle_t handle = kvm->arch.pkvm.handle;
  362. struct pkvm_mapping *mapping;
  363. bool young = false;
  364. lockdep_assert_held(&kvm->mmu_lock);
  365. for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
  366. young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
  367. mapping->nr_pages, mkold);
  368. return young;
  369. }
  370. int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
  371. enum kvm_pgtable_walk_flags flags)
  372. {
  373. return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
  374. }
  375. void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
  376. enum kvm_pgtable_walk_flags flags)
  377. {
  378. WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
  379. }
  380. void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
  381. {
  382. WARN_ON_ONCE(1);
  383. }
  384. kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
  385. enum kvm_pgtable_prot prot, void *mc, bool force_pte)
  386. {
  387. WARN_ON_ONCE(1);
  388. return NULL;
  389. }
  390. int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
  391. struct kvm_mmu_memory_cache *mc)
  392. {
  393. WARN_ON_ONCE(1);
  394. return -EINVAL;
  395. }