connection.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. *
  4. * Copyright (c) 2009, Microsoft Corporation.
  5. *
  6. * Authors:
  7. * Haiyang Zhang <haiyangz@microsoft.com>
  8. * Hank Janssen <hjanssen@microsoft.com>
  9. */
  10. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11. #include <linux/kernel.h>
  12. #include <linux/sched.h>
  13. #include <linux/wait.h>
  14. #include <linux/delay.h>
  15. #include <linux/mm.h>
  16. #include <linux/module.h>
  17. #include <linux/slab.h>
  18. #include <linux/vmalloc.h>
  19. #include <linux/hyperv.h>
  20. #include <linux/export.h>
  21. #include <linux/io.h>
  22. #include <linux/set_memory.h>
  23. #include <asm/mshyperv.h>
  24. #include "hyperv_vmbus.h"
  25. struct vmbus_connection vmbus_connection = {
  26. .conn_state = DISCONNECTED,
  27. .unload_event = COMPLETION_INITIALIZER(
  28. vmbus_connection.unload_event),
  29. .next_gpadl_handle = ATOMIC_INIT(0xE1E10),
  30. .ready_for_suspend_event = COMPLETION_INITIALIZER(
  31. vmbus_connection.ready_for_suspend_event),
  32. .all_offers_delivered_event = COMPLETION_INITIALIZER(
  33. vmbus_connection.all_offers_delivered_event),
  34. };
  35. EXPORT_SYMBOL_GPL(vmbus_connection);
  36. /*
  37. * Negotiated protocol version with the host.
  38. */
  39. __u32 vmbus_proto_version;
  40. EXPORT_SYMBOL_GPL(vmbus_proto_version);
  41. /*
  42. * Table of VMBus versions listed from newest to oldest.
  43. * VERSION_WIN7 and VERSION_WS2008 are no longer supported in
  44. * Linux guests and are not listed.
  45. */
  46. static __u32 vmbus_versions[] = {
  47. VERSION_WIN10_V6_0,
  48. VERSION_WIN10_V5_3,
  49. VERSION_WIN10_V5_2,
  50. VERSION_WIN10_V5_1,
  51. VERSION_WIN10_V5,
  52. VERSION_WIN10_V4_1,
  53. VERSION_WIN10,
  54. VERSION_WIN8_1,
  55. VERSION_WIN8
  56. };
  57. /*
  58. * Maximal VMBus protocol version guests can negotiate. Useful to cap the
  59. * VMBus version for testing and debugging purpose.
  60. */
  61. static uint max_version = VERSION_WIN10_V6_0;
  62. module_param(max_version, uint, S_IRUGO);
  63. MODULE_PARM_DESC(max_version,
  64. "Maximal VMBus protocol version which can be negotiated");
  65. int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
  66. {
  67. int ret = 0;
  68. struct vmbus_channel_initiate_contact *msg;
  69. unsigned long flags;
  70. init_completion(&msginfo->waitevent);
  71. msg = (struct vmbus_channel_initiate_contact *)msginfo->msg;
  72. memset(msg, 0, sizeof(*msg));
  73. msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT;
  74. msg->vmbus_version_requested = version;
  75. /*
  76. * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must
  77. * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message,
  78. * and for subsequent messages, we must use the Message Connection ID
  79. * field in the host-returned Version Response Message. And, with
  80. * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we
  81. * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for
  82. * compatibility.
  83. *
  84. * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1).
  85. */
  86. if (version >= VERSION_WIN10_V5) {
  87. msg->msg_sint = VMBUS_MESSAGE_SINT;
  88. msg->msg_vtl = ms_hyperv.vtl;
  89. vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4;
  90. } else {
  91. msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
  92. vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
  93. }
  94. if (vmbus_is_confidential() && version >= VERSION_WIN10_V6_0)
  95. msg->feature_flags = VMBUS_FEATURE_FLAG_CONFIDENTIAL_CHANNELS;
  96. /*
  97. * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always
  98. * bitwise OR it
  99. */
  100. msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) |
  101. ms_hyperv.shared_gpa_boundary;
  102. msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) |
  103. ms_hyperv.shared_gpa_boundary;
  104. msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
  105. /*
  106. * Add to list before we send the request since we may
  107. * receive the response before returning from this routine
  108. */
  109. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  110. list_add_tail(&msginfo->msglistentry,
  111. &vmbus_connection.chn_msg_list);
  112. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  113. ret = vmbus_post_msg(msg,
  114. sizeof(struct vmbus_channel_initiate_contact),
  115. true);
  116. trace_vmbus_negotiate_version(msg, ret);
  117. if (ret != 0) {
  118. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  119. list_del(&msginfo->msglistentry);
  120. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock,
  121. flags);
  122. return ret;
  123. }
  124. /* Wait for the connection response */
  125. wait_for_completion(&msginfo->waitevent);
  126. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  127. list_del(&msginfo->msglistentry);
  128. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  129. /* Check if successful */
  130. if (msginfo->response.version_response.version_supported) {
  131. vmbus_connection.conn_state = CONNECTED;
  132. if (version >= VERSION_WIN10_V5)
  133. vmbus_connection.msg_conn_id =
  134. msginfo->response.version_response.msg_conn_id;
  135. } else {
  136. return -ECONNREFUSED;
  137. }
  138. return ret;
  139. }
  140. /*
  141. * vmbus_connect - Sends a connect request on the partition service connection
  142. */
  143. int vmbus_connect(void)
  144. {
  145. struct vmbus_channel_msginfo *msginfo = NULL;
  146. int i, ret = 0;
  147. __u32 version;
  148. /* Initialize the vmbus connection */
  149. vmbus_connection.conn_state = CONNECTING;
  150. vmbus_connection.work_queue = create_workqueue("hv_vmbus_con");
  151. if (!vmbus_connection.work_queue) {
  152. ret = -ENOMEM;
  153. goto cleanup;
  154. }
  155. vmbus_connection.rescind_work_queue =
  156. create_workqueue("hv_vmbus_rescind");
  157. if (!vmbus_connection.rescind_work_queue) {
  158. ret = -ENOMEM;
  159. goto cleanup;
  160. }
  161. vmbus_connection.ignore_any_offer_msg = false;
  162. vmbus_connection.handle_primary_chan_wq =
  163. create_workqueue("hv_pri_chan");
  164. if (!vmbus_connection.handle_primary_chan_wq) {
  165. ret = -ENOMEM;
  166. goto cleanup;
  167. }
  168. vmbus_connection.handle_sub_chan_wq =
  169. create_workqueue("hv_sub_chan");
  170. if (!vmbus_connection.handle_sub_chan_wq) {
  171. ret = -ENOMEM;
  172. goto cleanup;
  173. }
  174. INIT_LIST_HEAD(&vmbus_connection.chn_msg_list);
  175. spin_lock_init(&vmbus_connection.channelmsg_lock);
  176. INIT_LIST_HEAD(&vmbus_connection.chn_list);
  177. mutex_init(&vmbus_connection.channel_mutex);
  178. /*
  179. * The following Hyper-V interrupt and monitor pages can be used by
  180. * UIO for mapping to user-space, so they should always be allocated on
  181. * system page boundaries. The system page size must be >= the Hyper-V
  182. * page size.
  183. */
  184. BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
  185. /*
  186. * Setup the vmbus event connection for channel interrupt
  187. * abstraction stuff
  188. */
  189. vmbus_connection.int_page =
  190. (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
  191. if (vmbus_connection.int_page == NULL) {
  192. ret = -ENOMEM;
  193. goto cleanup;
  194. }
  195. vmbus_connection.recv_int_page = vmbus_connection.int_page;
  196. vmbus_connection.send_int_page =
  197. (void *)((unsigned long)vmbus_connection.int_page +
  198. (HV_HYP_PAGE_SIZE >> 1));
  199. /*
  200. * Setup the monitor notification facility. The 1st page for
  201. * parent->child and the 2nd page for child->parent
  202. */
  203. vmbus_connection.monitor_pages[0] = (void *)__get_free_page(GFP_KERNEL);
  204. vmbus_connection.monitor_pages[1] = (void *)__get_free_page(GFP_KERNEL);
  205. if ((vmbus_connection.monitor_pages[0] == NULL) ||
  206. (vmbus_connection.monitor_pages[1] == NULL)) {
  207. ret = -ENOMEM;
  208. goto cleanup;
  209. }
  210. ret = set_memory_decrypted((unsigned long)
  211. vmbus_connection.monitor_pages[0], 1);
  212. ret |= set_memory_decrypted((unsigned long)
  213. vmbus_connection.monitor_pages[1], 1);
  214. if (ret) {
  215. /*
  216. * If set_memory_decrypted() fails, the encryption state
  217. * of the memory is unknown. So leak the memory instead
  218. * of risking returning decrypted memory to the free list.
  219. * For simplicity, always handle both pages the same.
  220. */
  221. vmbus_connection.monitor_pages[0] = NULL;
  222. vmbus_connection.monitor_pages[1] = NULL;
  223. goto cleanup;
  224. }
  225. /*
  226. * Set_memory_decrypted() will change the memory contents if
  227. * decryption occurs, so zero monitor pages here.
  228. */
  229. memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE);
  230. memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE);
  231. msginfo = kzalloc(sizeof(*msginfo) +
  232. sizeof(struct vmbus_channel_initiate_contact),
  233. GFP_KERNEL);
  234. if (msginfo == NULL) {
  235. ret = -ENOMEM;
  236. goto cleanup;
  237. }
  238. /*
  239. * Negotiate a compatible VMBUS version number with the
  240. * host. We start with the highest number we can support
  241. * and work our way down until we negotiate a compatible
  242. * version.
  243. */
  244. for (i = 0; ; i++) {
  245. if (i == ARRAY_SIZE(vmbus_versions)) {
  246. ret = -EDOM;
  247. goto cleanup;
  248. }
  249. version = vmbus_versions[i];
  250. if (version > max_version)
  251. continue;
  252. ret = vmbus_negotiate_version(msginfo, version);
  253. if (ret == -ETIMEDOUT)
  254. goto cleanup;
  255. if (vmbus_connection.conn_state == CONNECTED)
  256. break;
  257. }
  258. if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) {
  259. pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n",
  260. version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF);
  261. ret = -EINVAL;
  262. goto cleanup;
  263. }
  264. vmbus_proto_version = version;
  265. pr_info("Vmbus version:%d.%d\n",
  266. version >> 16, version & 0xFFFF);
  267. vmbus_connection.channels = kzalloc_objs(struct vmbus_channel *,
  268. MAX_CHANNEL_RELIDS);
  269. if (vmbus_connection.channels == NULL) {
  270. ret = -ENOMEM;
  271. goto cleanup;
  272. }
  273. kfree(msginfo);
  274. return 0;
  275. cleanup:
  276. pr_err("Unable to connect to host\n");
  277. vmbus_connection.conn_state = DISCONNECTED;
  278. vmbus_disconnect();
  279. kfree(msginfo);
  280. return ret;
  281. }
  282. void vmbus_disconnect(void)
  283. {
  284. /*
  285. * First send the unload request to the host.
  286. */
  287. vmbus_initiate_unload(false);
  288. if (vmbus_connection.handle_sub_chan_wq)
  289. destroy_workqueue(vmbus_connection.handle_sub_chan_wq);
  290. if (vmbus_connection.handle_primary_chan_wq)
  291. destroy_workqueue(vmbus_connection.handle_primary_chan_wq);
  292. if (vmbus_connection.rescind_work_queue)
  293. destroy_workqueue(vmbus_connection.rescind_work_queue);
  294. if (vmbus_connection.work_queue)
  295. destroy_workqueue(vmbus_connection.work_queue);
  296. if (vmbus_connection.int_page) {
  297. free_page((unsigned long)vmbus_connection.int_page);
  298. vmbus_connection.int_page = NULL;
  299. }
  300. if (vmbus_connection.monitor_pages[0]) {
  301. if (!set_memory_encrypted(
  302. (unsigned long)vmbus_connection.monitor_pages[0], 1))
  303. free_page((unsigned long)
  304. vmbus_connection.monitor_pages[0]);
  305. vmbus_connection.monitor_pages[0] = NULL;
  306. }
  307. if (vmbus_connection.monitor_pages[1]) {
  308. if (!set_memory_encrypted(
  309. (unsigned long)vmbus_connection.monitor_pages[1], 1))
  310. free_page((unsigned long)
  311. vmbus_connection.monitor_pages[1]);
  312. vmbus_connection.monitor_pages[1] = NULL;
  313. }
  314. }
  315. /*
  316. * relid2channel - Get the channel object given its
  317. * child relative id (ie channel id)
  318. */
  319. struct vmbus_channel *relid2channel(u32 relid)
  320. {
  321. if (vmbus_connection.channels == NULL) {
  322. pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid);
  323. return NULL;
  324. }
  325. if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
  326. return NULL;
  327. return READ_ONCE(vmbus_connection.channels[relid]);
  328. }
  329. /*
  330. * vmbus_on_event - Process a channel event notification
  331. *
  332. * For batched channels (default) optimize host to guest signaling
  333. * by ensuring:
  334. * 1. While reading the channel, we disable interrupts from host.
  335. * 2. Ensure that we process all posted messages from the host
  336. * before returning from this callback.
  337. * 3. Once we return, enable signaling from the host. Once this
  338. * state is set we check to see if additional packets are
  339. * available to read. In this case we repeat the process.
  340. * If this tasklet has been running for a long time
  341. * then reschedule ourselves.
  342. */
  343. void vmbus_on_event(unsigned long data)
  344. {
  345. struct vmbus_channel *channel = (void *) data;
  346. void (*callback_fn)(void *context);
  347. trace_vmbus_on_event(channel);
  348. hv_debug_delay_test(channel, INTERRUPT_DELAY);
  349. /* A channel once created is persistent even when
  350. * there is no driver handling the device. An
  351. * unloading driver sets the onchannel_callback to NULL.
  352. */
  353. callback_fn = READ_ONCE(channel->onchannel_callback);
  354. if (unlikely(!callback_fn))
  355. return;
  356. (*callback_fn)(channel->channel_callback_context);
  357. if (channel->callback_mode != HV_CALL_BATCHED)
  358. return;
  359. if (likely(hv_end_read(&channel->inbound) == 0))
  360. return;
  361. hv_begin_read(&channel->inbound);
  362. tasklet_schedule(&channel->callback_event);
  363. }
  364. /*
  365. * vmbus_post_msg - Send a msg on the vmbus's message connection
  366. */
  367. int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep)
  368. {
  369. struct vmbus_channel_message_header *hdr;
  370. union hv_connection_id conn_id;
  371. int ret = 0;
  372. int retries = 0;
  373. u32 usec = 1;
  374. conn_id.asu32 = 0;
  375. conn_id.u.id = vmbus_connection.msg_conn_id;
  376. /*
  377. * hv_post_message() can have transient failures because of
  378. * insufficient resources. Retry the operation a couple of
  379. * times before giving up.
  380. */
  381. while (retries < 100) {
  382. ret = hv_post_message(conn_id, 1, buffer, buflen);
  383. switch (ret) {
  384. case HV_STATUS_INVALID_CONNECTION_ID:
  385. /*
  386. * See vmbus_negotiate_version(): VMBus protocol 5.0
  387. * and higher require that we must use
  388. * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate
  389. * Contact message, but on old hosts that only
  390. * support VMBus protocol 4.0 or lower, here we get
  391. * HV_STATUS_INVALID_CONNECTION_ID and we should
  392. * return an error immediately without retrying.
  393. */
  394. hdr = buffer;
  395. if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT)
  396. return -EINVAL;
  397. /*
  398. * We could get this if we send messages too
  399. * frequently.
  400. */
  401. ret = -EAGAIN;
  402. break;
  403. case HV_STATUS_INSUFFICIENT_MEMORY:
  404. case HV_STATUS_INSUFFICIENT_BUFFERS:
  405. ret = -ENOBUFS;
  406. break;
  407. case HV_STATUS_SUCCESS:
  408. return ret;
  409. default:
  410. pr_err("hv_post_msg() failed; error code:%d\n", ret);
  411. return -EINVAL;
  412. }
  413. retries++;
  414. if (can_sleep && usec > 1000)
  415. msleep(usec / 1000);
  416. else if (usec < MAX_UDELAY_MS * 1000)
  417. udelay(usec);
  418. else
  419. mdelay(usec / 1000);
  420. if (retries < 22)
  421. usec *= 2;
  422. }
  423. return ret;
  424. }
  425. /*
  426. * vmbus_set_event - Send an event notification to the parent
  427. */
  428. void vmbus_set_event(struct vmbus_channel *channel)
  429. {
  430. u32 child_relid = channel->offermsg.child_relid;
  431. if (!channel->is_dedicated_interrupt)
  432. vmbus_send_interrupt(child_relid);
  433. ++channel->sig_events;
  434. if (ms_hyperv.paravisor_present) {
  435. if (hv_isolation_type_snp())
  436. hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
  437. NULL, sizeof(channel->sig_event));
  438. else if (hv_isolation_type_tdx())
  439. hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT,
  440. channel->sig_event, 0);
  441. else
  442. WARN_ON_ONCE(1);
  443. } else {
  444. u64 control = HVCALL_SIGNAL_EVENT;
  445. control |= hv_nested ? HV_HYPERCALL_NESTED : 0;
  446. hv_do_fast_hypercall8(control, channel->sig_event);
  447. }
  448. }
  449. EXPORT_SYMBOL_GPL(vmbus_set_event);