hyperv_vmbus.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. *
  4. * Copyright (c) 2011, Microsoft Corporation.
  5. *
  6. * Authors:
  7. * Haiyang Zhang <haiyangz@microsoft.com>
  8. * Hank Janssen <hjanssen@microsoft.com>
  9. * K. Y. Srinivasan <kys@microsoft.com>
  10. */
  11. #ifndef _HYPERV_VMBUS_H
  12. #define _HYPERV_VMBUS_H
  13. #include <linux/list.h>
  14. #include <linux/bitops.h>
  15. #include <asm/sync_bitops.h>
  16. #include <asm/mshyperv.h>
  17. #include <linux/atomic.h>
  18. #include <linux/hyperv.h>
  19. #include <linux/interrupt.h>
  20. #include <hyperv/hvhdk.h>
  21. #include "hv_trace.h"
  22. /*
  23. * Timeout for services such as KVP and fcopy.
  24. */
  25. #define HV_UTIL_TIMEOUT 30
  26. /*
  27. * Timeout for guest-host handshake for services.
  28. */
  29. #define HV_UTIL_NEGO_TIMEOUT 55
  30. void vmbus_isr(void);
  31. /* Definitions for the monitored notification facility */
  32. union hv_monitor_trigger_group {
  33. u64 as_uint64;
  34. struct {
  35. u32 pending;
  36. u32 armed;
  37. };
  38. };
  39. struct hv_monitor_parameter {
  40. union hv_connection_id connectionid;
  41. u16 flagnumber;
  42. u16 rsvdz;
  43. };
  44. union hv_monitor_trigger_state {
  45. u32 asu32;
  46. struct {
  47. u32 group_enable:4;
  48. u32 rsvdz:28;
  49. };
  50. };
  51. /* struct hv_monitor_page Layout */
  52. /* ------------------------------------------------------ */
  53. /* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */
  54. /* | 8 | TriggerGroup[0] | */
  55. /* | 10 | TriggerGroup[1] | */
  56. /* | 18 | TriggerGroup[2] | */
  57. /* | 20 | TriggerGroup[3] | */
  58. /* | 28 | Rsvd2[0] | */
  59. /* | 30 | Rsvd2[1] | */
  60. /* | 38 | Rsvd2[2] | */
  61. /* | 40 | NextCheckTime[0][0] | NextCheckTime[0][1] | */
  62. /* | ... | */
  63. /* | 240 | Latency[0][0..3] | */
  64. /* | 340 | Rsvz3[0] | */
  65. /* | 440 | Parameter[0][0] | */
  66. /* | 448 | Parameter[0][1] | */
  67. /* | ... | */
  68. /* | 840 | Rsvd4[0] | */
  69. /* ------------------------------------------------------ */
  70. struct hv_monitor_page {
  71. union hv_monitor_trigger_state trigger_state;
  72. u32 rsvdz1;
  73. union hv_monitor_trigger_group trigger_group[4];
  74. u64 rsvdz2[3];
  75. s32 next_checktime[4][32];
  76. u16 latency[4][32];
  77. u64 rsvdz3[32];
  78. struct hv_monitor_parameter parameter[4][32];
  79. u8 rsvdz4[1984];
  80. };
  81. #define HV_HYPERCALL_PARAM_ALIGN sizeof(u64)
  82. /* Definition of the hv_post_message hypercall input structure. */
  83. struct hv_input_post_message {
  84. union hv_connection_id connectionid;
  85. u32 reserved;
  86. u32 message_type;
  87. u32 payload_size;
  88. u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
  89. };
  90. enum {
  91. VMBUS_MESSAGE_CONNECTION_ID = 1,
  92. VMBUS_MESSAGE_CONNECTION_ID_4 = 4,
  93. VMBUS_MESSAGE_PORT_ID = 1,
  94. VMBUS_EVENT_CONNECTION_ID = 2,
  95. VMBUS_EVENT_PORT_ID = 2,
  96. VMBUS_MONITOR_CONNECTION_ID = 3,
  97. VMBUS_MONITOR_PORT_ID = 3,
  98. VMBUS_MESSAGE_SINT = 2,
  99. };
  100. /*
  101. * Per cpu state for channel handling
  102. */
  103. struct hv_per_cpu_context {
  104. /*
  105. * SynIC pages for communicating with the host.
  106. *
  107. * These pages are accessible to the host partition and the hypervisor.
  108. * They may be used for exchanging data with the host partition and the
  109. * hypervisor even when they aren't trusted yet the guest partition
  110. * must be prepared to handle the malicious behavior.
  111. */
  112. void *hyp_synic_message_page;
  113. void *hyp_synic_event_page;
  114. /*
  115. * SynIC pages for communicating with the paravisor.
  116. *
  117. * These pages may be accessed from within the guest partition only in
  118. * CoCo VMs. Neither the host partition nor the hypervisor can access
  119. * these pages in that case; they are used for exchanging data with the
  120. * paravisor.
  121. */
  122. void *para_synic_message_page;
  123. void *para_synic_event_page;
  124. /*
  125. * The page is only used in hv_post_message() for a TDX VM (with the
  126. * paravisor) to post a messages to Hyper-V: when such a VM calls
  127. * HVCALL_POST_MESSAGE, it can't use the hyperv_pcpu_input_arg (which
  128. * is encrypted in such a VM) as the hypercall input page, because
  129. * the input page for HVCALL_POST_MESSAGE must be decrypted in such a
  130. * VM, so post_msg_page (which is decrypted in hv_synic_alloc()) is
  131. * introduced for this purpose. See hyperv_init() for more comments.
  132. */
  133. void *post_msg_page;
  134. /*
  135. * Starting with win8, we can take channel interrupts on any CPU;
  136. * we will manage the tasklet that handles events messages on a per CPU
  137. * basis.
  138. */
  139. struct tasklet_struct msg_dpc;
  140. };
  141. struct hv_context {
  142. /* We only support running on top of Hyper-V
  143. * So at this point this really can only contain the Hyper-V ID
  144. */
  145. u64 guestid;
  146. struct hv_per_cpu_context __percpu *cpu_context;
  147. /*
  148. * To manage allocations in a NUMA node.
  149. * Array indexed by numa node ID.
  150. */
  151. struct cpumask *hv_numa_map;
  152. };
  153. extern struct hv_context hv_context;
  154. /* Hv Interface */
  155. extern int hv_init(void);
  156. extern int hv_post_message(union hv_connection_id connection_id,
  157. enum hv_message_type message_type,
  158. void *payload, size_t payload_size);
  159. extern int hv_synic_alloc(void);
  160. extern void hv_synic_free(void);
  161. extern void hv_hyp_synic_enable_regs(unsigned int cpu);
  162. extern int hv_synic_init(unsigned int cpu);
  163. extern void hv_hyp_synic_disable_regs(unsigned int cpu);
  164. extern int hv_synic_cleanup(unsigned int cpu);
  165. /* Interface */
  166. void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
  167. int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
  168. struct page *pages, u32 pagecnt, u32 max_pkt_size,
  169. bool confidential);
  170. void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
  171. int hv_ringbuffer_write(struct vmbus_channel *channel,
  172. const struct kvec *kv_list, u32 kv_count,
  173. u64 requestid, u64 *trans_id);
  174. int hv_ringbuffer_read(struct vmbus_channel *channel,
  175. void *buffer, u32 buflen, u32 *buffer_actual_len,
  176. u64 *requestid, bool raw);
  177. /*
  178. * The Maximum number of channels (16384) is determined by the size of the
  179. * interrupt page, which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to
  180. * send endpoint interrupts, and the other is to receive endpoint interrupts.
  181. */
  182. #define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3)
  183. /* The value here must be in multiple of 32 */
  184. #define MAX_NUM_CHANNELS_SUPPORTED 256
  185. #define MAX_CHANNEL_RELIDS \
  186. max(MAX_NUM_CHANNELS_SUPPORTED, HV_EVENT_FLAGS_COUNT)
  187. enum vmbus_connect_state {
  188. DISCONNECTED,
  189. CONNECTING,
  190. CONNECTED,
  191. DISCONNECTING
  192. };
  193. #define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT
  194. /*
  195. * The CPU that Hyper-V will interrupt for VMBUS messages, such as
  196. * CHANNELMSG_OFFERCHANNEL and CHANNELMSG_RESCIND_CHANNELOFFER.
  197. */
  198. #define VMBUS_CONNECT_CPU 0
  199. struct vmbus_connection {
  200. u32 msg_conn_id;
  201. atomic_t offer_in_progress;
  202. enum vmbus_connect_state conn_state;
  203. atomic_t next_gpadl_handle;
  204. struct completion unload_event;
  205. /*
  206. * Represents channel interrupts. Each bit position represents a
  207. * channel. When a channel sends an interrupt via VMBUS, it finds its
  208. * bit in the sendInterruptPage, set it and calls Hv to generate a port
  209. * event. The other end receives the port event and parse the
  210. * recvInterruptPage to see which bit is set
  211. */
  212. void *int_page;
  213. void *send_int_page;
  214. void *recv_int_page;
  215. /*
  216. * 2 pages - 1st page for parent->child notification and 2nd
  217. * is child->parent notification
  218. */
  219. struct hv_monitor_page *monitor_pages[2];
  220. struct list_head chn_msg_list;
  221. spinlock_t channelmsg_lock;
  222. /* List of channels */
  223. struct list_head chn_list;
  224. struct mutex channel_mutex;
  225. /* Array of channels */
  226. struct vmbus_channel **channels;
  227. /*
  228. * An offer message is handled first on the work_queue, and then
  229. * is further handled on handle_primary_chan_wq or
  230. * handle_sub_chan_wq.
  231. */
  232. struct workqueue_struct *work_queue;
  233. struct workqueue_struct *handle_primary_chan_wq;
  234. struct workqueue_struct *handle_sub_chan_wq;
  235. struct workqueue_struct *rescind_work_queue;
  236. /*
  237. * On suspension of the vmbus, the accumulated offer messages
  238. * must be dropped.
  239. */
  240. bool ignore_any_offer_msg;
  241. /*
  242. * The number of sub-channels and hv_sock channels that should be
  243. * cleaned up upon suspend: sub-channels will be re-created upon
  244. * resume, and hv_sock channels should not survive suspend.
  245. */
  246. atomic_t nr_chan_close_on_suspend;
  247. /*
  248. * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
  249. * drop to zero.
  250. */
  251. struct completion ready_for_suspend_event;
  252. /*
  253. * Completed once the host has offered all boot-time channels.
  254. * Note that some channels may still be under process on a workqueue.
  255. */
  256. struct completion all_offers_delivered_event;
  257. };
  258. struct vmbus_msginfo {
  259. /* Bookkeeping stuff */
  260. struct list_head msglist_entry;
  261. /* The message itself */
  262. unsigned char msg[];
  263. };
  264. extern struct vmbus_connection vmbus_connection;
  265. int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
  266. static inline void vmbus_send_interrupt(u32 relid)
  267. {
  268. sync_set_bit(relid, vmbus_connection.send_int_page);
  269. }
  270. enum vmbus_message_handler_type {
  271. /* The related handler can sleep. */
  272. VMHT_BLOCKING = 0,
  273. /* The related handler must NOT sleep. */
  274. VMHT_NON_BLOCKING = 1,
  275. };
  276. struct vmbus_channel_message_table_entry {
  277. enum vmbus_channel_message_type message_type;
  278. enum vmbus_message_handler_type handler_type;
  279. void (*message_handler)(struct vmbus_channel_message_header *msg);
  280. u32 min_payload_len;
  281. };
  282. extern const struct vmbus_channel_message_table_entry
  283. channel_message_table[CHANNELMSG_COUNT];
  284. /* General vmbus interface */
  285. bool vmbus_is_confidential(void);
  286. #if IS_ENABLED(CONFIG_HYPERV_VMBUS)
  287. /* Free the message slot and signal end-of-message if required */
  288. static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
  289. {
  290. /*
  291. * On crash we're reading some other CPU's message page and we need
  292. * to be careful: this other CPU may already had cleared the header
  293. * and the host may already had delivered some other message there.
  294. * In case we blindly write msg->header.message_type we're going
  295. * to lose it. We can still lose a message of the same type but
  296. * we count on the fact that there can only be one
  297. * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
  298. * on crash.
  299. */
  300. if (!try_cmpxchg(&msg->header.message_type,
  301. &old_msg_type, HVMSG_NONE))
  302. return;
  303. /*
  304. * The cmpxchg() above does an implicit memory barrier to
  305. * ensure the write to MessageType (ie set to
  306. * HVMSG_NONE) happens before we read the
  307. * MessagePending and EOMing. Otherwise, the EOMing
  308. * will not deliver any more messages since there is
  309. * no empty slot
  310. */
  311. if (msg->header.message_flags.msg_pending) {
  312. /*
  313. * This will cause message queue rescan to
  314. * possibly deliver another msg from the
  315. * hypervisor
  316. */
  317. if (vmbus_is_confidential())
  318. hv_para_set_synic_register(HV_MSR_EOM, 0);
  319. else
  320. hv_set_msr(HV_MSR_EOM, 0);
  321. }
  322. }
  323. extern int vmbus_interrupt;
  324. extern int vmbus_irq;
  325. #endif /* CONFIG_HYPERV_VMBUS */
  326. struct hv_device *vmbus_device_create(const guid_t *type,
  327. const guid_t *instance,
  328. struct vmbus_channel *channel);
  329. int vmbus_device_register(struct hv_device *child_device_obj);
  330. void vmbus_device_unregister(struct hv_device *device_obj);
  331. int vmbus_add_channel_kobj(struct hv_device *device_obj,
  332. struct vmbus_channel *channel);
  333. void vmbus_remove_channel_attr_group(struct vmbus_channel *channel);
  334. void vmbus_channel_map_relid(struct vmbus_channel *channel);
  335. void vmbus_channel_unmap_relid(struct vmbus_channel *channel);
  336. struct vmbus_channel *relid2channel(u32 relid);
  337. void vmbus_free_channels(void);
  338. /* Connection interface */
  339. int vmbus_connect(void);
  340. void vmbus_disconnect(void);
  341. int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep);
  342. void vmbus_on_event(unsigned long data);
  343. void vmbus_on_msg_dpc(unsigned long data);
  344. int hv_kvp_init(struct hv_util_service *srv);
  345. int hv_kvp_init_transport(void);
  346. void hv_kvp_deinit(void);
  347. int hv_kvp_pre_suspend(void);
  348. int hv_kvp_pre_resume(void);
  349. void hv_kvp_onchannelcallback(void *context);
  350. int hv_vss_init(struct hv_util_service *srv);
  351. int hv_vss_init_transport(void);
  352. void hv_vss_deinit(void);
  353. int hv_vss_pre_suspend(void);
  354. int hv_vss_pre_resume(void);
  355. void hv_vss_onchannelcallback(void *context);
  356. void vmbus_initiate_unload(bool crash);
  357. static inline void hv_poll_channel(struct vmbus_channel *channel,
  358. void (*cb)(void *))
  359. {
  360. if (!channel)
  361. return;
  362. cb(channel);
  363. }
  364. enum hvutil_device_state {
  365. HVUTIL_DEVICE_INIT = 0, /* driver is loaded, waiting for userspace */
  366. HVUTIL_READY, /* userspace is registered */
  367. HVUTIL_HOSTMSG_RECEIVED, /* message from the host was received */
  368. HVUTIL_USERSPACE_REQ, /* request to userspace was sent */
  369. HVUTIL_USERSPACE_RECV, /* reply from userspace was received */
  370. HVUTIL_DEVICE_DYING, /* driver unload is in progress */
  371. };
  372. enum delay {
  373. INTERRUPT_DELAY = 0,
  374. MESSAGE_DELAY = 1,
  375. };
  376. extern const struct vmbus_device vmbus_devs[];
  377. static inline bool hv_is_perf_channel(struct vmbus_channel *channel)
  378. {
  379. return vmbus_devs[channel->device_id].perf_device;
  380. }
  381. static inline size_t hv_dev_ring_size(struct vmbus_channel *channel)
  382. {
  383. return vmbus_devs[channel->device_id].pref_ring_size;
  384. }
  385. static inline bool hv_is_allocated_cpu(unsigned int cpu)
  386. {
  387. struct vmbus_channel *channel, *sc;
  388. lockdep_assert_held(&vmbus_connection.channel_mutex);
  389. /*
  390. * List additions/deletions as well as updates of the target CPUs are
  391. * protected by channel_mutex.
  392. */
  393. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  394. if (!hv_is_perf_channel(channel))
  395. continue;
  396. if (channel->target_cpu == cpu)
  397. return true;
  398. list_for_each_entry(sc, &channel->sc_list, sc_list) {
  399. if (sc->target_cpu == cpu)
  400. return true;
  401. }
  402. }
  403. return false;
  404. }
  405. static inline void hv_set_allocated_cpu(unsigned int cpu)
  406. {
  407. cpumask_set_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
  408. }
  409. static inline void hv_clear_allocated_cpu(unsigned int cpu)
  410. {
  411. if (hv_is_allocated_cpu(cpu))
  412. return;
  413. cpumask_clear_cpu(cpu, &hv_context.hv_numa_map[cpu_to_node(cpu)]);
  414. }
  415. static inline void hv_update_allocated_cpus(unsigned int old_cpu,
  416. unsigned int new_cpu)
  417. {
  418. hv_set_allocated_cpu(new_cpu);
  419. hv_clear_allocated_cpu(old_cpu);
  420. }
  421. #ifdef CONFIG_HYPERV_TESTING
  422. int hv_debug_add_dev_dir(struct hv_device *dev);
  423. void hv_debug_rm_dev_dir(struct hv_device *dev);
  424. void hv_debug_rm_all_dir(void);
  425. int hv_debug_init(void);
  426. void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type);
  427. #else /* CONFIG_HYPERV_TESTING */
  428. static inline void hv_debug_rm_dev_dir(struct hv_device *dev) {};
  429. static inline void hv_debug_rm_all_dir(void) {};
  430. static inline void hv_debug_delay_test(struct vmbus_channel *channel,
  431. enum delay delay_type) {};
  432. static inline int hv_debug_init(void)
  433. {
  434. return -1;
  435. }
  436. static inline int hv_debug_add_dev_dir(struct hv_device *dev)
  437. {
  438. return -1;
  439. }
  440. #endif /* CONFIG_HYPERV_TESTING */
  441. /* Create and remove sysfs entry for memory mapped ring buffers for a channel */
  442. int hv_create_ring_sysfs(struct vmbus_channel *channel,
  443. int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
  444. struct vm_area_struct *vma));
  445. int hv_remove_ring_sysfs(struct vmbus_channel *channel);
  446. #endif /* _HYPERV_VMBUS_H */