kvm_pgtable.h 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2020 Google LLC
  4. * Author: Will Deacon <will@kernel.org>
  5. */
  6. #ifndef __ARM64_KVM_PGTABLE_H__
  7. #define __ARM64_KVM_PGTABLE_H__
  8. #include <linux/bits.h>
  9. #include <linux/kvm_host.h>
  10. #include <linux/types.h>
  11. #define KVM_PGTABLE_FIRST_LEVEL -1
  12. #define KVM_PGTABLE_LAST_LEVEL 3
  13. /*
  14. * The largest supported block sizes for KVM (no 52-bit PA support):
  15. * - 4K (level 1): 1GB
  16. * - 16K (level 2): 32MB
  17. * - 64K (level 2): 512MB
  18. */
  19. #ifdef CONFIG_ARM64_4K_PAGES
  20. #define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
  21. #else
  22. #define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
  23. #endif
  24. #define kvm_lpa2_is_enabled() system_supports_lpa2()
  25. static inline u64 kvm_get_parange_max(void)
  26. {
  27. if (kvm_lpa2_is_enabled() ||
  28. (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
  29. return ID_AA64MMFR0_EL1_PARANGE_52;
  30. else
  31. return ID_AA64MMFR0_EL1_PARANGE_48;
  32. }
  33. static inline u64 kvm_get_parange(u64 mmfr0)
  34. {
  35. u64 parange_max = kvm_get_parange_max();
  36. u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
  37. ID_AA64MMFR0_EL1_PARANGE_SHIFT);
  38. if (parange > parange_max)
  39. parange = parange_max;
  40. return parange;
  41. }
  42. typedef u64 kvm_pte_t;
  43. #define KVM_PTE_VALID BIT(0)
  44. #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
  45. #define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
  46. #define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
  47. #define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
  48. #define KVM_PHYS_INVALID (-1ULL)
  49. #define KVM_PTE_TYPE BIT(1)
  50. #define KVM_PTE_TYPE_BLOCK 0
  51. #define KVM_PTE_TYPE_PAGE 1
  52. #define KVM_PTE_TYPE_TABLE 1
  53. #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
  54. #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
  55. #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
  56. #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
  57. ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
  58. #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
  59. ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
  60. #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
  61. #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
  62. #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
  63. #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
  64. #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
  65. #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
  66. #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
  67. #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
  68. #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
  69. #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
  70. #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
  71. #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
  72. #define KVM_PTE_LEAF_ATTR_HI_S1_UXN BIT(54)
  73. #define KVM_PTE_LEAF_ATTR_HI_S1_PXN BIT(53)
  74. #define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
  75. #define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
  76. #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
  77. KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
  78. KVM_PTE_LEAF_ATTR_HI_S2_XN)
  79. #define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
  80. #define KVM_MAX_OWNER_ID 1
  81. /*
  82. * Used to indicate a pte for which a 'break-before-make' sequence is in
  83. * progress.
  84. */
  85. #define KVM_INVALID_PTE_LOCKED BIT(10)
  86. static inline bool kvm_pte_valid(kvm_pte_t pte)
  87. {
  88. return pte & KVM_PTE_VALID;
  89. }
  90. static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
  91. {
  92. u64 pa;
  93. if (kvm_lpa2_is_enabled()) {
  94. pa = pte & KVM_PTE_ADDR_MASK_LPA2;
  95. pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
  96. } else {
  97. pa = pte & KVM_PTE_ADDR_MASK;
  98. if (PAGE_SHIFT == 16)
  99. pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
  100. }
  101. return pa;
  102. }
  103. static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
  104. {
  105. kvm_pte_t pte;
  106. if (kvm_lpa2_is_enabled()) {
  107. pte = pa & KVM_PTE_ADDR_MASK_LPA2;
  108. pa &= GENMASK(51, 50);
  109. pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
  110. } else {
  111. pte = pa & KVM_PTE_ADDR_MASK;
  112. if (PAGE_SHIFT == 16) {
  113. pa &= GENMASK(51, 48);
  114. pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
  115. }
  116. }
  117. return pte;
  118. }
  119. static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
  120. {
  121. return __phys_to_pfn(kvm_pte_to_phys(pte));
  122. }
  123. static inline u64 kvm_granule_shift(s8 level)
  124. {
  125. /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
  126. return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
  127. }
  128. static inline u64 kvm_granule_size(s8 level)
  129. {
  130. return BIT(kvm_granule_shift(level));
  131. }
  132. static inline bool kvm_level_supports_block_mapping(s8 level)
  133. {
  134. return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
  135. }
  136. static inline u32 kvm_supported_block_sizes(void)
  137. {
  138. s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
  139. u32 r = 0;
  140. for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
  141. r |= BIT(kvm_granule_shift(level));
  142. return r;
  143. }
  144. static inline bool kvm_is_block_size_supported(u64 size)
  145. {
  146. bool is_power_of_two = IS_ALIGNED(size, size);
  147. return is_power_of_two && (size & kvm_supported_block_sizes());
  148. }
  149. /**
  150. * struct kvm_pgtable_mm_ops - Memory management callbacks.
  151. * @zalloc_page: Allocate a single zeroed memory page.
  152. * The @arg parameter can be used by the walker
  153. * to pass a memcache. The initial refcount of
  154. * the page is 1.
  155. * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
  156. * The @size parameter is in bytes, and is rounded
  157. * up to the next page boundary. The resulting
  158. * allocation is physically contiguous.
  159. * @free_pages_exact: Free an exact number of memory pages previously
  160. * allocated by zalloc_pages_exact.
  161. * @free_unlinked_table: Free an unlinked paging structure by unlinking and
  162. * dropping references.
  163. * @get_page: Increment the refcount on a page.
  164. * @put_page: Decrement the refcount on a page. When the
  165. * refcount reaches 0 the page is automatically
  166. * freed.
  167. * @page_count: Return the refcount of a page.
  168. * @phys_to_virt: Convert a physical address into a virtual
  169. * address mapped in the current context.
  170. * @virt_to_phys: Convert a virtual address mapped in the current
  171. * context into a physical address.
  172. * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC
  173. * for the specified memory address range.
  174. * @icache_inval_pou: Invalidate the instruction cache to the PoU
  175. * for the specified memory address range.
  176. */
  177. struct kvm_pgtable_mm_ops {
  178. void* (*zalloc_page)(void *arg);
  179. void* (*zalloc_pages_exact)(size_t size);
  180. void (*free_pages_exact)(void *addr, size_t size);
  181. void (*free_unlinked_table)(void *addr, s8 level);
  182. void (*get_page)(void *addr);
  183. void (*put_page)(void *addr);
  184. int (*page_count)(void *addr);
  185. void* (*phys_to_virt)(phys_addr_t phys);
  186. phys_addr_t (*virt_to_phys)(void *addr);
  187. void (*dcache_clean_inval_poc)(void *addr, size_t size);
  188. void (*icache_inval_pou)(void *addr, size_t size);
  189. };
  190. /**
  191. * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
  192. * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings.
  193. * @KVM_PGTABLE_S2_AS_S1: Final memory attributes are that of Stage-1.
  194. */
  195. enum kvm_pgtable_stage2_flags {
  196. KVM_PGTABLE_S2_IDMAP = BIT(0),
  197. KVM_PGTABLE_S2_AS_S1 = BIT(1),
  198. };
  199. /**
  200. * enum kvm_pgtable_prot - Page-table permissions and attributes.
  201. * @KVM_PGTABLE_PROT_UX: Unprivileged execute permission.
  202. * @KVM_PGTABLE_PROT_PX: Privileged execute permission.
  203. * @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission.
  204. * @KVM_PGTABLE_PROT_W: Write permission.
  205. * @KVM_PGTABLE_PROT_R: Read permission.
  206. * @KVM_PGTABLE_PROT_DEVICE: Device attributes.
  207. * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes.
  208. * @KVM_PGTABLE_PROT_SW0: Software bit 0.
  209. * @KVM_PGTABLE_PROT_SW1: Software bit 1.
  210. * @KVM_PGTABLE_PROT_SW2: Software bit 2.
  211. * @KVM_PGTABLE_PROT_SW3: Software bit 3.
  212. */
  213. enum kvm_pgtable_prot {
  214. KVM_PGTABLE_PROT_PX = BIT(0),
  215. KVM_PGTABLE_PROT_UX = BIT(1),
  216. KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
  217. KVM_PGTABLE_PROT_UX,
  218. KVM_PGTABLE_PROT_W = BIT(2),
  219. KVM_PGTABLE_PROT_R = BIT(3),
  220. KVM_PGTABLE_PROT_DEVICE = BIT(4),
  221. KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
  222. KVM_PGTABLE_PROT_SW0 = BIT(55),
  223. KVM_PGTABLE_PROT_SW1 = BIT(56),
  224. KVM_PGTABLE_PROT_SW2 = BIT(57),
  225. KVM_PGTABLE_PROT_SW3 = BIT(58),
  226. };
  227. #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
  228. #define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
  229. #define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX
  230. #define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW
  231. #define PAGE_HYP KVM_PGTABLE_PROT_RW
  232. #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
  233. #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
  234. #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
  235. typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
  236. enum kvm_pgtable_prot prot);
  237. /**
  238. * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
  239. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
  240. * entries.
  241. * @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their
  242. * children.
  243. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
  244. * children.
  245. * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
  246. * with other software walkers.
  247. * @KVM_PGTABLE_WALK_IGNORE_EAGAIN: Don't terminate the walk early if
  248. * the walker returns -EAGAIN.
  249. * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
  250. * without Break-before-make's
  251. * TLB invalidation.
  252. * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
  253. * without Cache maintenance
  254. * operations required.
  255. */
  256. enum kvm_pgtable_walk_flags {
  257. KVM_PGTABLE_WALK_LEAF = BIT(0),
  258. KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
  259. KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
  260. KVM_PGTABLE_WALK_SHARED = BIT(3),
  261. KVM_PGTABLE_WALK_IGNORE_EAGAIN = BIT(4),
  262. KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
  263. KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
  264. };
  265. struct kvm_pgtable_visit_ctx {
  266. kvm_pte_t *ptep;
  267. kvm_pte_t old;
  268. void *arg;
  269. struct kvm_pgtable_mm_ops *mm_ops;
  270. u64 start;
  271. u64 addr;
  272. u64 end;
  273. s8 level;
  274. enum kvm_pgtable_walk_flags flags;
  275. };
  276. typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
  277. enum kvm_pgtable_walk_flags visit);
  278. static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
  279. {
  280. return ctx->flags & KVM_PGTABLE_WALK_SHARED;
  281. }
  282. /**
  283. * struct kvm_pgtable_walker - Hook into a page-table walk.
  284. * @cb: Callback function to invoke during the walk.
  285. * @arg: Argument passed to the callback function.
  286. * @flags: Bitwise-OR of flags to identify the entry types on which to
  287. * invoke the callback function.
  288. */
  289. struct kvm_pgtable_walker {
  290. const kvm_pgtable_visitor_fn_t cb;
  291. void * const arg;
  292. const enum kvm_pgtable_walk_flags flags;
  293. };
  294. /*
  295. * RCU cannot be used in a non-kernel context such as the hyp. As such, page
  296. * table walkers used in hyp do not call into RCU and instead use other
  297. * synchronization mechanisms (such as a spinlock).
  298. */
  299. #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
  300. typedef kvm_pte_t *kvm_pteref_t;
  301. static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
  302. kvm_pteref_t pteref)
  303. {
  304. return pteref;
  305. }
  306. static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
  307. {
  308. return pteref;
  309. }
  310. static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
  311. {
  312. /*
  313. * Due to the lack of RCU (or a similar protection scheme), only
  314. * non-shared table walkers are allowed in the hypervisor.
  315. */
  316. if (walker->flags & KVM_PGTABLE_WALK_SHARED)
  317. return -EPERM;
  318. return 0;
  319. }
  320. static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
  321. static inline bool kvm_pgtable_walk_lock_held(void)
  322. {
  323. return true;
  324. }
  325. #else
  326. typedef kvm_pte_t __rcu *kvm_pteref_t;
  327. static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
  328. kvm_pteref_t pteref)
  329. {
  330. return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
  331. }
  332. static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
  333. {
  334. return rcu_dereference_raw(pteref);
  335. }
  336. static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
  337. {
  338. if (walker->flags & KVM_PGTABLE_WALK_SHARED)
  339. rcu_read_lock();
  340. return 0;
  341. }
  342. static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
  343. {
  344. if (walker->flags & KVM_PGTABLE_WALK_SHARED)
  345. rcu_read_unlock();
  346. }
  347. static inline bool kvm_pgtable_walk_lock_held(void)
  348. {
  349. return rcu_read_lock_held();
  350. }
  351. #endif
  352. /**
  353. * struct kvm_pgtable - KVM page-table.
  354. * @ia_bits: Maximum input address size, in bits.
  355. * @start_level: Level at which the page-table walk starts.
  356. * @pgd: Pointer to the first top-level entry of the page-table.
  357. * @mm_ops: Memory management callbacks.
  358. * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
  359. * @flags: Stage-2 page-table flags.
  360. * @force_pte_cb: Function that returns true if page level mappings must
  361. * be used instead of block mappings.
  362. */
  363. struct kvm_pgtable {
  364. union {
  365. struct rb_root_cached pkvm_mappings;
  366. struct {
  367. u32 ia_bits;
  368. s8 start_level;
  369. kvm_pteref_t pgd;
  370. struct kvm_pgtable_mm_ops *mm_ops;
  371. /* Stage-2 only */
  372. enum kvm_pgtable_stage2_flags flags;
  373. kvm_pgtable_force_pte_cb_t force_pte_cb;
  374. };
  375. };
  376. struct kvm_s2_mmu *mmu;
  377. };
  378. /**
  379. * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
  380. * @pgt: Uninitialised page-table structure to initialise.
  381. * @va_bits: Maximum virtual address bits.
  382. * @mm_ops: Memory management callbacks.
  383. *
  384. * Return: 0 on success, negative error code on failure.
  385. */
  386. int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
  387. struct kvm_pgtable_mm_ops *mm_ops);
  388. /**
  389. * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
  390. * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
  391. *
  392. * The page-table is assumed to be unreachable by any hardware walkers prior
  393. * to freeing and therefore no TLB invalidation is performed.
  394. */
  395. void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
  396. /**
  397. * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
  398. * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
  399. * @addr: Virtual address at which to place the mapping.
  400. * @size: Size of the mapping.
  401. * @phys: Physical address of the memory to map.
  402. * @prot: Permissions and attributes for the mapping.
  403. *
  404. * The offset of @addr within a page is ignored, @size is rounded-up to
  405. * the next page boundary and @phys is rounded-down to the previous page
  406. * boundary.
  407. *
  408. * If device attributes are not explicitly requested in @prot, then the
  409. * mapping will be normal, cacheable. Attempts to install a new mapping
  410. * for a virtual address that is already mapped will be rejected with an
  411. * error and a WARN().
  412. *
  413. * Return: 0 on success, negative error code on failure.
  414. */
  415. int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
  416. enum kvm_pgtable_prot prot);
  417. /**
  418. * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
  419. * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
  420. * @addr: Virtual address from which to remove the mapping.
  421. * @size: Size of the mapping.
  422. *
  423. * The offset of @addr within a page is ignored, @size is rounded-up to
  424. * the next page boundary and @phys is rounded-down to the previous page
  425. * boundary.
  426. *
  427. * TLB invalidation is performed for each page-table entry cleared during the
  428. * unmapping operation and the reference count for the page-table page
  429. * containing the cleared entry is decremented, with unreferenced pages being
  430. * freed. The unmapping operation will stop early if it encounters either an
  431. * invalid page-table entry or a valid block mapping which maps beyond the range
  432. * being unmapped.
  433. *
  434. * Return: Number of bytes unmapped, which may be 0.
  435. */
  436. u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
  437. /**
  438. * kvm_get_vtcr() - Helper to construct VTCR_EL2
  439. * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
  440. * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
  441. * @phys_shfit: Value to set in VTCR_EL2.T0SZ.
  442. *
  443. * The VTCR value is common across all the physical CPUs on the system.
  444. * We use system wide sanitised values to fill in different fields,
  445. * except for Hardware Management of Access Flags. HA Flag is set
  446. * unconditionally on all CPUs, as it is safe to run with or without
  447. * the feature and the bit is RES0 on CPUs that don't support it.
  448. *
  449. * Return: VTCR_EL2 value
  450. */
  451. u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
  452. /**
  453. * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
  454. * @vtcr: Content of the VTCR register.
  455. *
  456. * Return: the size (in bytes) of the stage-2 PGD
  457. */
  458. size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
  459. /**
  460. * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  461. * @pgt: Uninitialised page-table structure to initialise.
  462. * @mmu: S2 MMU context for this S2 translation
  463. * @mm_ops: Memory management callbacks.
  464. * @flags: Stage-2 configuration flags.
  465. * @force_pte_cb: Function that returns true if page level mappings must
  466. * be used instead of block mappings.
  467. *
  468. * Return: 0 on success, negative error code on failure.
  469. */
  470. int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
  471. struct kvm_pgtable_mm_ops *mm_ops,
  472. enum kvm_pgtable_stage2_flags flags,
  473. kvm_pgtable_force_pte_cb_t force_pte_cb);
  474. static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
  475. struct kvm_pgtable_mm_ops *mm_ops)
  476. {
  477. return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL);
  478. }
  479. /**
  480. * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
  481. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  482. *
  483. * The page-table is assumed to be unreachable by any hardware walkers prior
  484. * to freeing and therefore no TLB invalidation is performed.
  485. */
  486. void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
  487. /**
  488. * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
  489. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  490. * @addr: Intermediate physical address at which to place the mapping.
  491. * @size: Size of the mapping.
  492. *
  493. * The page-table is assumed to be unreachable by any hardware walkers prior
  494. * to freeing and therefore no TLB invalidation is performed.
  495. */
  496. void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
  497. u64 addr, u64 size);
  498. /**
  499. * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
  500. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  501. *
  502. * It is assumed that the rest of the page-table is freed before this operation.
  503. */
  504. void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
  505. /**
  506. * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
  507. * @mm_ops: Memory management callbacks.
  508. * @pgtable: Unlinked stage-2 paging structure to be freed.
  509. * @level: Level of the stage-2 paging structure to be freed.
  510. *
  511. * The page-table is assumed to be unreachable by any hardware walkers prior to
  512. * freeing and therefore no TLB invalidation is performed.
  513. */
  514. void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
  515. /**
  516. * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
  517. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  518. * @phys: Physical address of the memory to map.
  519. * @level: Starting level of the stage-2 paging structure to be created.
  520. * @prot: Permissions and attributes for the mapping.
  521. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  522. * page-table pages.
  523. * @force_pte: Force mappings to PAGE_SIZE granularity.
  524. *
  525. * Returns an unlinked page-table tree. This new page-table tree is
  526. * not reachable (i.e., it is unlinked) from the root pgd and it's
  527. * therefore unreachableby the hardware page-table walker. No TLB
  528. * invalidation or CMOs are performed.
  529. *
  530. * If device attributes are not explicitly requested in @prot, then the
  531. * mapping will be normal, cacheable.
  532. *
  533. * Return: The fully populated (unlinked) stage-2 paging structure, or
  534. * an ERR_PTR(error) on failure.
  535. */
  536. kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
  537. u64 phys, s8 level,
  538. enum kvm_pgtable_prot prot,
  539. void *mc, bool force_pte);
  540. /**
  541. * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
  542. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  543. * @addr: Intermediate physical address at which to place the mapping.
  544. * @size: Size of the mapping.
  545. * @phys: Physical address of the memory to map.
  546. * @prot: Permissions and attributes for the mapping.
  547. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  548. * page-table pages.
  549. * @flags: Flags to control the page-table walk (ex. a shared walk)
  550. *
  551. * The offset of @addr within a page is ignored, @size is rounded-up to
  552. * the next page boundary and @phys is rounded-down to the previous page
  553. * boundary.
  554. *
  555. * If device attributes are not explicitly requested in @prot, then the
  556. * mapping will be normal, cacheable.
  557. *
  558. * Note that the update of a valid leaf PTE in this function will be aborted,
  559. * if it's trying to recreate the exact same mapping or only change the access
  560. * permissions. Instead, the vCPU will exit one more time from guest if still
  561. * needed and then go through the path of relaxing permissions.
  562. *
  563. * Note that this function will both coalesce existing table entries and split
  564. * existing block mappings, relying on page-faults to fault back areas outside
  565. * of the new mapping lazily.
  566. *
  567. * Return: 0 on success, negative error code on failure.
  568. */
  569. int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
  570. u64 phys, enum kvm_pgtable_prot prot,
  571. void *mc, enum kvm_pgtable_walk_flags flags);
  572. /**
  573. * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
  574. * track ownership.
  575. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  576. * @addr: Base intermediate physical address to annotate.
  577. * @size: Size of the annotated range.
  578. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  579. * page-table pages.
  580. * @owner_id: Unique identifier for the owner of the page.
  581. *
  582. * By default, all page-tables are owned by identifier 0. This function can be
  583. * used to mark portions of the IPA space as owned by other entities. When a
  584. * stage 2 is used with identity-mappings, these annotations allow to use the
  585. * page-table data structure as a simple rmap.
  586. *
  587. * Return: 0 on success, negative error code on failure.
  588. */
  589. int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
  590. void *mc, u8 owner_id);
  591. /**
  592. * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
  593. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  594. * @addr: Intermediate physical address from which to remove the mapping.
  595. * @size: Size of the mapping.
  596. *
  597. * The offset of @addr within a page is ignored and @size is rounded-up to
  598. * the next page boundary.
  599. *
  600. * TLB invalidation is performed for each page-table entry cleared during the
  601. * unmapping operation and the reference count for the page-table page
  602. * containing the cleared entry is decremented, with unreferenced pages being
  603. * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
  604. * FWB is not supported by the CPU.
  605. *
  606. * Return: 0 on success, negative error code on failure.
  607. */
  608. int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
  609. /**
  610. * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
  611. * without TLB invalidation.
  612. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  613. * @addr: Intermediate physical address from which to write-protect,
  614. * @size: Size of the range.
  615. *
  616. * The offset of @addr within a page is ignored and @size is rounded-up to
  617. * the next page boundary.
  618. *
  619. * Note that it is the caller's responsibility to invalidate the TLB after
  620. * calling this function to ensure that the updated permissions are visible
  621. * to the CPUs.
  622. *
  623. * Return: 0 on success, negative error code on failure.
  624. */
  625. int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
  626. /**
  627. * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
  628. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  629. * @addr: Intermediate physical address to identify the page-table entry.
  630. * @flags: Flags to control the page-table walk (ex. a shared walk)
  631. *
  632. * The offset of @addr within a page is ignored.
  633. *
  634. * If there is a valid, leaf page-table entry used to translate @addr, then
  635. * set the access flag in that entry.
  636. */
  637. void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
  638. enum kvm_pgtable_walk_flags flags);
  639. /**
  640. * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
  641. * flag in a page-table entry.
  642. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  643. * @addr: Intermediate physical address to identify the page-table entry.
  644. * @size: Size of the address range to visit.
  645. * @mkold: True if the access flag should be cleared.
  646. *
  647. * The offset of @addr within a page is ignored.
  648. *
  649. * Tests and conditionally clears the access flag for every valid, leaf
  650. * page-table entry used to translate the range [@addr, @addr + @size).
  651. *
  652. * Note that it is the caller's responsibility to invalidate the TLB after
  653. * calling this function to ensure that the updated permissions are visible
  654. * to the CPUs.
  655. *
  656. * Return: True if any of the visited PTEs had the access flag set.
  657. */
  658. bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
  659. u64 size, bool mkold);
  660. /**
  661. * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
  662. * page-table entry.
  663. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  664. * @addr: Intermediate physical address to identify the page-table entry.
  665. * @prot: Additional permissions to grant for the mapping.
  666. * @flags: Flags to control the page-table walk (ex. a shared walk)
  667. *
  668. * The offset of @addr within a page is ignored.
  669. *
  670. * If there is a valid, leaf page-table entry used to translate @addr, then
  671. * relax the permissions in that entry according to the read, write and
  672. * execute permissions specified by @prot. No permissions are removed, and
  673. * TLB invalidation is performed after updating the entry. Software bits cannot
  674. * be set or cleared using kvm_pgtable_stage2_relax_perms().
  675. *
  676. * Return: 0 on success, negative error code on failure.
  677. */
  678. int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
  679. enum kvm_pgtable_prot prot,
  680. enum kvm_pgtable_walk_flags flags);
  681. /**
  682. * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
  683. * of Coherency for guest stage-2 address
  684. * range.
  685. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
  686. * @addr: Intermediate physical address from which to flush.
  687. * @size: Size of the range.
  688. *
  689. * The offset of @addr within a page is ignored and @size is rounded-up to
  690. * the next page boundary.
  691. *
  692. * Return: 0 on success, negative error code on failure.
  693. */
  694. int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
  695. /**
  696. * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
  697. * to PAGE_SIZE guest pages.
  698. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
  699. * @addr: Intermediate physical address from which to split.
  700. * @size: Size of the range.
  701. * @mc: Cache of pre-allocated and zeroed memory from which to allocate
  702. * page-table pages.
  703. *
  704. * The function tries to split any level 1 or 2 entry that overlaps
  705. * with the input range (given by @addr and @size).
  706. *
  707. * Return: 0 on success, negative error code on failure. Note that
  708. * kvm_pgtable_stage2_split() is best effort: it tries to break as many
  709. * blocks in the input range as allowed by @mc_capacity.
  710. */
  711. int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
  712. struct kvm_mmu_memory_cache *mc);
  713. /**
  714. * kvm_pgtable_walk() - Walk a page-table.
  715. * @pgt: Page-table structure initialised by kvm_pgtable_*_init().
  716. * @addr: Input address for the start of the walk.
  717. * @size: Size of the range to walk.
  718. * @walker: Walker callback description.
  719. *
  720. * The offset of @addr within a page is ignored and @size is rounded-up to
  721. * the next page boundary.
  722. *
  723. * The walker will walk the page-table entries corresponding to the input
  724. * address range specified, visiting entries according to the walker flags.
  725. * Invalid entries are treated as leaf entries. The visited page table entry is
  726. * reloaded after invoking the walker callback, allowing the walker to descend
  727. * into a newly installed table.
  728. *
  729. * Returning a negative error code from the walker callback function will
  730. * terminate the walk immediately with the same error code.
  731. *
  732. * Return: 0 on success, negative error code on failure.
  733. */
  734. int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
  735. struct kvm_pgtable_walker *walker);
  736. /**
  737. * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
  738. * with its level.
  739. * @pgt: Page-table structure initialised by kvm_pgtable_*_init()
  740. * or a similar initialiser.
  741. * @addr: Input address for the start of the walk.
  742. * @ptep: Pointer to storage for the retrieved PTE.
  743. * @level: Pointer to storage for the level of the retrieved PTE.
  744. *
  745. * The offset of @addr within a page is ignored.
  746. *
  747. * The walker will walk the page-table entries corresponding to the input
  748. * address specified, retrieving the leaf corresponding to this address.
  749. * Invalid entries are treated as leaf entries.
  750. *
  751. * Return: 0 on success, negative error code on failure.
  752. */
  753. int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
  754. kvm_pte_t *ptep, s8 *level);
  755. /**
  756. * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
  757. * stage-2 Page-Table Entry.
  758. * @pte: Page-table entry
  759. *
  760. * Return: protection attributes of the page-table entry in the enum
  761. * kvm_pgtable_prot format.
  762. */
  763. enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
  764. /**
  765. * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
  766. * Page-Table Entry.
  767. * @pte: Page-table entry
  768. *
  769. * Return: protection attributes of the page-table entry in the enum
  770. * kvm_pgtable_prot format.
  771. */
  772. enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
  773. /**
  774. * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
  775. *
  776. * @mmu: Stage-2 KVM MMU struct
  777. * @addr: The base Intermediate physical address from which to invalidate
  778. * @size: Size of the range from the base to invalidate
  779. */
  780. void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
  781. phys_addr_t addr, size_t size);
  782. #endif /* __ARM64_KVM_PGTABLE_H__ */