mshv_root.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Copyright (c) 2023, Microsoft Corporation.
  4. */
  5. #ifndef _MSHV_ROOT_H_
  6. #define _MSHV_ROOT_H_
  7. #include <linux/spinlock.h>
  8. #include <linux/mutex.h>
  9. #include <linux/semaphore.h>
  10. #include <linux/sched.h>
  11. #include <linux/srcu.h>
  12. #include <linux/wait.h>
  13. #include <linux/hashtable.h>
  14. #include <linux/dev_printk.h>
  15. #include <linux/build_bug.h>
  16. #include <linux/mmu_notifier.h>
  17. #include <uapi/linux/mshv.h>
  18. /*
  19. * Hypervisor must be between these version numbers (inclusive)
  20. * to guarantee compatibility
  21. */
  22. #define MSHV_HV_MIN_VERSION (27744)
  23. #define MSHV_HV_MAX_VERSION (27751)
  24. static_assert(HV_HYP_PAGE_SIZE == MSHV_HV_PAGE_SIZE);
  25. #define MSHV_MAX_VPS 256
  26. #define MSHV_PARTITIONS_HASH_BITS 9
  27. #define MSHV_PIN_PAGES_BATCH_SIZE (0x10000000ULL / HV_HYP_PAGE_SIZE)
  28. struct mshv_vp {
  29. u32 vp_index;
  30. struct mshv_partition *vp_partition;
  31. struct mutex vp_mutex;
  32. struct hv_vp_register_page *vp_register_page;
  33. struct hv_message *vp_intercept_msg_page;
  34. void *vp_ghcb_page;
  35. struct hv_stats_page *vp_stats_pages[2];
  36. struct {
  37. atomic64_t vp_signaled_count;
  38. struct {
  39. u64 intercept_suspend: 1;
  40. u64 root_sched_blocked: 1; /* root scheduler only */
  41. u64 root_sched_dispatched: 1; /* root scheduler only */
  42. u64 reserved: 61;
  43. } flags;
  44. unsigned int kicked_by_hv;
  45. wait_queue_head_t vp_suspend_queue;
  46. } run;
  47. #if IS_ENABLED(CONFIG_DEBUG_FS)
  48. struct dentry *vp_stats_dentry;
  49. #endif
  50. };
  51. #define vp_fmt(fmt) "p%lluvp%u: " fmt
  52. #define vp_devprintk(level, v, fmt, ...) \
  53. do { \
  54. const struct mshv_vp *__vp = (v); \
  55. const struct mshv_partition *__pt = __vp->vp_partition; \
  56. dev_##level(__pt->pt_module_dev, vp_fmt(fmt), __pt->pt_id, \
  57. __vp->vp_index, ##__VA_ARGS__); \
  58. } while (0)
  59. #define vp_emerg(v, fmt, ...) vp_devprintk(emerg, v, fmt, ##__VA_ARGS__)
  60. #define vp_crit(v, fmt, ...) vp_devprintk(crit, v, fmt, ##__VA_ARGS__)
  61. #define vp_alert(v, fmt, ...) vp_devprintk(alert, v, fmt, ##__VA_ARGS__)
  62. #define vp_err(v, fmt, ...) vp_devprintk(err, v, fmt, ##__VA_ARGS__)
  63. #define vp_warn(v, fmt, ...) vp_devprintk(warn, v, fmt, ##__VA_ARGS__)
  64. #define vp_notice(v, fmt, ...) vp_devprintk(notice, v, fmt, ##__VA_ARGS__)
  65. #define vp_info(v, fmt, ...) vp_devprintk(info, v, fmt, ##__VA_ARGS__)
  66. #define vp_dbg(v, fmt, ...) vp_devprintk(dbg, v, fmt, ##__VA_ARGS__)
  67. enum mshv_region_type {
  68. MSHV_REGION_TYPE_MEM_PINNED,
  69. MSHV_REGION_TYPE_MEM_MOVABLE,
  70. MSHV_REGION_TYPE_MMIO
  71. };
  72. struct mshv_mem_region {
  73. struct hlist_node hnode;
  74. struct kref mreg_refcount;
  75. u64 nr_pages;
  76. u64 start_gfn;
  77. u64 start_uaddr;
  78. u32 hv_map_flags;
  79. struct mshv_partition *partition;
  80. enum mshv_region_type mreg_type;
  81. struct mmu_interval_notifier mreg_mni;
  82. struct mutex mreg_mutex; /* protects region pages remapping */
  83. struct page *mreg_pages[];
  84. };
  85. struct mshv_irq_ack_notifier {
  86. struct hlist_node link;
  87. unsigned int irq_ack_gsi;
  88. void (*irq_acked)(struct mshv_irq_ack_notifier *mian);
  89. };
  90. struct mshv_partition {
  91. struct device *pt_module_dev;
  92. struct hlist_node pt_hnode;
  93. u64 pt_id;
  94. refcount_t pt_ref_count;
  95. struct mutex pt_mutex;
  96. spinlock_t pt_mem_regions_lock;
  97. struct hlist_head pt_mem_regions; // not ordered
  98. u32 pt_vp_count;
  99. struct mshv_vp *pt_vp_array[MSHV_MAX_VPS];
  100. struct mutex pt_irq_lock;
  101. struct srcu_struct pt_irq_srcu;
  102. struct hlist_head irq_ack_notifier_list;
  103. struct hlist_head pt_devices;
  104. /*
  105. * MSHV does not support more than one async hypercall in flight
  106. * for a single partition. Thus, it is okay to define per partition
  107. * async hypercall status.
  108. */
  109. struct completion async_hypercall;
  110. u64 async_hypercall_status;
  111. spinlock_t pt_irqfds_lock;
  112. struct hlist_head pt_irqfds_list;
  113. struct mutex irqfds_resampler_lock;
  114. struct hlist_head irqfds_resampler_list;
  115. struct hlist_head ioeventfds_list;
  116. struct mshv_girq_routing_table __rcu *pt_girq_tbl;
  117. u64 isolation_type;
  118. bool import_completed;
  119. bool pt_initialized;
  120. #if IS_ENABLED(CONFIG_DEBUG_FS)
  121. struct dentry *pt_stats_dentry;
  122. struct dentry *pt_vp_dentry;
  123. #endif
  124. };
  125. #define pt_fmt(fmt) "p%llu: " fmt
  126. #define pt_devprintk(level, p, fmt, ...) \
  127. do { \
  128. const struct mshv_partition *__pt = (p); \
  129. dev_##level(__pt->pt_module_dev, pt_fmt(fmt), __pt->pt_id, \
  130. ##__VA_ARGS__); \
  131. } while (0)
  132. #define pt_emerg(p, fmt, ...) pt_devprintk(emerg, p, fmt, ##__VA_ARGS__)
  133. #define pt_crit(p, fmt, ...) pt_devprintk(crit, p, fmt, ##__VA_ARGS__)
  134. #define pt_alert(p, fmt, ...) pt_devprintk(alert, p, fmt, ##__VA_ARGS__)
  135. #define pt_err(p, fmt, ...) pt_devprintk(err, p, fmt, ##__VA_ARGS__)
  136. #define pt_warn(p, fmt, ...) pt_devprintk(warn, p, fmt, ##__VA_ARGS__)
  137. #define pt_notice(p, fmt, ...) pt_devprintk(notice, p, fmt, ##__VA_ARGS__)
  138. #define pt_info(p, fmt, ...) pt_devprintk(info, p, fmt, ##__VA_ARGS__)
  139. #define pt_dbg(p, fmt, ...) pt_devprintk(dbg, p, fmt, ##__VA_ARGS__)
  140. struct mshv_lapic_irq {
  141. u32 lapic_vector;
  142. u64 lapic_apic_id;
  143. union hv_interrupt_control lapic_control;
  144. };
  145. #define MSHV_MAX_GUEST_IRQS 4096
  146. /* representation of one guest irq entry, either msi or legacy */
  147. struct mshv_guest_irq_ent {
  148. u32 girq_entry_valid; /* vfio looks at this */
  149. u32 guest_irq_num; /* a unique number for each irq */
  150. u32 girq_addr_lo; /* guest irq msi address info */
  151. u32 girq_addr_hi;
  152. u32 girq_irq_data; /* idt vector in some cases */
  153. };
  154. struct mshv_girq_routing_table {
  155. u32 num_rt_entries;
  156. struct mshv_guest_irq_ent mshv_girq_info_tbl[];
  157. };
  158. struct hv_synic_pages {
  159. struct hv_message_page *hyp_synic_message_page;
  160. struct hv_synic_event_flags_page *synic_event_flags_page;
  161. struct hv_synic_event_ring_page *synic_event_ring_page;
  162. };
  163. struct mshv_root {
  164. spinlock_t pt_ht_lock;
  165. DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS);
  166. struct hv_partition_property_vmm_capabilities vmm_caps;
  167. };
  168. /*
  169. * Callback for doorbell events.
  170. * NOTE: This is called in interrupt context. Callback
  171. * should defer slow and sleeping logic to later.
  172. */
  173. typedef void (*doorbell_cb_t) (int doorbell_id, void *);
  174. /*
  175. * port table information
  176. */
  177. struct port_table_info {
  178. struct rcu_head portbl_rcu;
  179. enum hv_port_type hv_port_type;
  180. union {
  181. struct {
  182. u64 reserved[2];
  183. } hv_port_message;
  184. struct {
  185. u64 reserved[2];
  186. } hv_port_event;
  187. struct {
  188. u64 reserved[2];
  189. } hv_port_monitor;
  190. struct {
  191. doorbell_cb_t doorbell_cb;
  192. void *data;
  193. } hv_port_doorbell;
  194. };
  195. };
  196. int mshv_update_routing_table(struct mshv_partition *partition,
  197. const struct mshv_user_irq_entry *entries,
  198. unsigned int numents);
  199. void mshv_free_routing_table(struct mshv_partition *partition);
  200. struct mshv_guest_irq_ent mshv_ret_girq_entry(struct mshv_partition *partition,
  201. u32 irq_num);
  202. void mshv_copy_girq_info(struct mshv_guest_irq_ent *src_irq,
  203. struct mshv_lapic_irq *dest_irq);
  204. void mshv_irqfd_routing_update(struct mshv_partition *partition);
  205. void mshv_port_table_fini(void);
  206. int mshv_portid_alloc(struct port_table_info *info);
  207. int mshv_portid_lookup(int port_id, struct port_table_info *info);
  208. void mshv_portid_free(int port_id);
  209. int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb,
  210. void *data, u64 gpa, u64 val, u64 flags);
  211. void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid);
  212. void mshv_isr(void);
  213. int mshv_synic_init(struct device *dev);
  214. void mshv_synic_exit(void);
  215. static inline bool mshv_partition_encrypted(struct mshv_partition *partition)
  216. {
  217. return partition->isolation_type == HV_PARTITION_ISOLATION_TYPE_SNP;
  218. }
  219. struct mshv_partition *mshv_partition_get(struct mshv_partition *partition);
  220. void mshv_partition_put(struct mshv_partition *partition);
  221. struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU);
  222. static inline bool is_l1vh_parent(u64 partition_id)
  223. {
  224. return hv_l1vh_partition() && (partition_id == HV_PARTITION_ID_SELF);
  225. }
  226. int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
  227. struct hv_stats_page **stats_pages);
  228. void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
  229. struct hv_stats_page **stats_pages);
  230. /* hypercalls */
  231. int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
  232. int hv_call_create_partition(u64 flags,
  233. struct hv_partition_creation_properties creation_properties,
  234. union hv_partition_isolation_properties isolation_properties,
  235. u64 *partition_id);
  236. int hv_call_initialize_partition(u64 partition_id);
  237. int hv_call_finalize_partition(u64 partition_id);
  238. int hv_call_delete_partition(u64 partition_id);
  239. int hv_call_map_mmio_pages(u64 partition_id, u64 gfn, u64 mmio_spa, u64 numpgs);
  240. int hv_call_map_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
  241. u32 flags, struct page **pages);
  242. int hv_call_unmap_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
  243. u32 flags);
  244. int hv_call_delete_vp(u64 partition_id, u32 vp_index);
  245. int hv_call_assert_virtual_interrupt(u64 partition_id, u32 vector,
  246. u64 dest_addr,
  247. union hv_interrupt_control control);
  248. int hv_call_clear_virtual_interrupt(u64 partition_id);
  249. int hv_call_get_gpa_access_states(u64 partition_id, u32 count, u64 gpa_base_pfn,
  250. union hv_gpa_page_access_state_flags state_flags,
  251. int *written_total,
  252. union hv_gpa_page_access_state *states);
  253. int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
  254. struct hv_vp_state_data state_data,
  255. /* Choose between pages and ret_output */
  256. u64 page_count, struct page **pages,
  257. union hv_output_get_vp_state *ret_output);
  258. int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
  259. /* Choose between pages and bytes */
  260. struct hv_vp_state_data state_data, u64 page_count,
  261. struct page **pages, u32 num_bytes, u8 *bytes);
  262. int hv_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
  263. union hv_input_vtl input_vtl,
  264. struct page **state_page);
  265. int hv_unmap_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
  266. struct page *state_page,
  267. union hv_input_vtl input_vtl);
  268. int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
  269. u64 connection_partition_id, struct hv_port_info *port_info,
  270. u8 port_vtl, u8 min_connection_vtl, int node);
  271. int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id);
  272. int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
  273. u64 connection_partition_id,
  274. union hv_connection_id connection_id,
  275. struct hv_connection_info *connection_info,
  276. u8 connection_vtl, int node);
  277. int hv_call_disconnect_port(u64 connection_partition_id,
  278. union hv_connection_id connection_id);
  279. int hv_call_notify_port_ring_empty(u32 sint_index);
  280. int hv_map_stats_page(enum hv_stats_object_type type,
  281. const union hv_stats_object_identity *identity,
  282. struct hv_stats_page **addr);
  283. int hv_unmap_stats_page(enum hv_stats_object_type type,
  284. struct hv_stats_page *page_addr,
  285. const union hv_stats_object_identity *identity);
  286. int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
  287. u64 page_struct_count, u32 host_access,
  288. u32 flags, u8 acquire);
  289. int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg,
  290. void *property_value, size_t property_value_sz);
  291. #if IS_ENABLED(CONFIG_DEBUG_FS)
  292. int __init mshv_debugfs_init(void);
  293. void mshv_debugfs_exit(void);
  294. int mshv_debugfs_partition_create(struct mshv_partition *partition);
  295. void mshv_debugfs_partition_remove(struct mshv_partition *partition);
  296. int mshv_debugfs_vp_create(struct mshv_vp *vp);
  297. void mshv_debugfs_vp_remove(struct mshv_vp *vp);
  298. #else
  299. static inline int __init mshv_debugfs_init(void)
  300. {
  301. return 0;
  302. }
  303. static inline void mshv_debugfs_exit(void) { }
  304. static inline int mshv_debugfs_partition_create(struct mshv_partition *partition)
  305. {
  306. return 0;
  307. }
  308. static inline void mshv_debugfs_partition_remove(struct mshv_partition *partition) { }
  309. static inline int mshv_debugfs_vp_create(struct mshv_vp *vp)
  310. {
  311. return 0;
  312. }
  313. static inline void mshv_debugfs_vp_remove(struct mshv_vp *vp) { }
  314. #endif
  315. extern struct mshv_root mshv_root;
  316. extern enum hv_scheduler_type hv_scheduler_type;
  317. extern u8 * __percpu *hv_synic_eventring_tail;
  318. struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
  319. u64 uaddr, u32 flags);
  320. int mshv_region_share(struct mshv_mem_region *region);
  321. int mshv_region_unshare(struct mshv_mem_region *region);
  322. int mshv_region_map(struct mshv_mem_region *region);
  323. void mshv_region_invalidate(struct mshv_mem_region *region);
  324. int mshv_region_pin(struct mshv_mem_region *region);
  325. void mshv_region_put(struct mshv_mem_region *region);
  326. int mshv_region_get(struct mshv_mem_region *region);
  327. bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
  328. void mshv_region_movable_fini(struct mshv_mem_region *region);
  329. bool mshv_region_movable_init(struct mshv_mem_region *region);
  330. #endif /* _MSHV_ROOT_H_ */