channel_mgmt.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Haiyang Zhang <haiyangz@microsoft.com>
  7. * Hank Janssen <hjanssen@microsoft.com>
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/kernel.h>
  11. #include <linux/interrupt.h>
  12. #include <linux/sched.h>
  13. #include <linux/wait.h>
  14. #include <linux/mm.h>
  15. #include <linux/slab.h>
  16. #include <linux/list.h>
  17. #include <linux/module.h>
  18. #include <linux/completion.h>
  19. #include <linux/delay.h>
  20. #include <linux/cpu.h>
  21. #include <linux/hyperv.h>
  22. #include <linux/export.h>
  23. #include <asm/mshyperv.h>
  24. #include <linux/sched/isolation.h>
  25. #include "hyperv_vmbus.h"
  26. static void init_vp_index(struct vmbus_channel *channel);
  27. const struct vmbus_device vmbus_devs[] = {
  28. /* IDE */
  29. { .dev_type = HV_IDE,
  30. HV_IDE_GUID,
  31. .perf_device = true,
  32. .allowed_in_isolated = false,
  33. },
  34. /* SCSI */
  35. { .dev_type = HV_SCSI,
  36. HV_SCSI_GUID,
  37. .perf_device = true,
  38. .allowed_in_isolated = true,
  39. },
  40. /* Fibre Channel */
  41. { .dev_type = HV_FC,
  42. HV_SYNTHFC_GUID,
  43. .perf_device = true,
  44. .allowed_in_isolated = false,
  45. },
  46. /* Synthetic NIC */
  47. { .dev_type = HV_NIC,
  48. HV_NIC_GUID,
  49. .perf_device = true,
  50. .allowed_in_isolated = true,
  51. },
  52. /* Network Direct */
  53. { .dev_type = HV_ND,
  54. HV_ND_GUID,
  55. .perf_device = true,
  56. .allowed_in_isolated = false,
  57. },
  58. /* PCIE */
  59. { .dev_type = HV_PCIE,
  60. HV_PCIE_GUID,
  61. .perf_device = false,
  62. .allowed_in_isolated = true,
  63. },
  64. /* Synthetic Frame Buffer */
  65. { .dev_type = HV_FB,
  66. HV_SYNTHVID_GUID,
  67. .perf_device = false,
  68. .allowed_in_isolated = false,
  69. },
  70. /* Synthetic Keyboard */
  71. { .dev_type = HV_KBD,
  72. HV_KBD_GUID,
  73. .perf_device = false,
  74. .allowed_in_isolated = false,
  75. },
  76. /* Synthetic MOUSE */
  77. { .dev_type = HV_MOUSE,
  78. HV_MOUSE_GUID,
  79. .perf_device = false,
  80. .allowed_in_isolated = false,
  81. },
  82. /* KVP */
  83. { .dev_type = HV_KVP,
  84. HV_KVP_GUID,
  85. .perf_device = false,
  86. .allowed_in_isolated = false,
  87. },
  88. /* Time Synch */
  89. { .dev_type = HV_TS,
  90. HV_TS_GUID,
  91. .perf_device = false,
  92. .allowed_in_isolated = true,
  93. },
  94. /* Heartbeat */
  95. { .dev_type = HV_HB,
  96. HV_HEART_BEAT_GUID,
  97. .perf_device = false,
  98. .allowed_in_isolated = true,
  99. },
  100. /* Shutdown */
  101. { .dev_type = HV_SHUTDOWN,
  102. HV_SHUTDOWN_GUID,
  103. .perf_device = false,
  104. .allowed_in_isolated = true,
  105. },
  106. /* File copy */
  107. /* fcopy always uses 16KB ring buffer size and is working well for last many years */
  108. { .pref_ring_size = 0x4000,
  109. .dev_type = HV_FCOPY,
  110. HV_FCOPY_GUID,
  111. .perf_device = false,
  112. .allowed_in_isolated = false,
  113. },
  114. /* Backup */
  115. { .dev_type = HV_BACKUP,
  116. HV_VSS_GUID,
  117. .perf_device = false,
  118. .allowed_in_isolated = false,
  119. },
  120. /* Dynamic Memory */
  121. { .dev_type = HV_DM,
  122. HV_DM_GUID,
  123. .perf_device = false,
  124. .allowed_in_isolated = false,
  125. },
  126. /*
  127. * Unknown GUID
  128. * 64 KB ring buffer + 4 KB header should be sufficient size for any Hyper-V device apart
  129. * from HV_NIC and HV_SCSI. This case avoid the fallback for unknown devices to allocate
  130. * much bigger (2 MB) of ring size.
  131. */
  132. { .pref_ring_size = 0x11000,
  133. .dev_type = HV_UNKNOWN,
  134. .perf_device = false,
  135. .allowed_in_isolated = false,
  136. },
  137. };
  138. EXPORT_SYMBOL_GPL(vmbus_devs);
  139. static const struct {
  140. guid_t guid;
  141. } vmbus_unsupported_devs[] = {
  142. { HV_AVMA1_GUID },
  143. { HV_AVMA2_GUID },
  144. { HV_RDV_GUID },
  145. { HV_IMC_GUID },
  146. };
  147. /*
  148. * The rescinded channel may be blocked waiting for a response from the host;
  149. * take care of that.
  150. */
  151. static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
  152. {
  153. struct vmbus_channel_msginfo *msginfo;
  154. unsigned long flags;
  155. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  156. channel->rescind = true;
  157. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  158. msglistentry) {
  159. if (msginfo->waiting_channel == channel) {
  160. complete(&msginfo->waitevent);
  161. break;
  162. }
  163. }
  164. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  165. }
  166. static bool is_unsupported_vmbus_devs(const guid_t *guid)
  167. {
  168. int i;
  169. for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
  170. if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
  171. return true;
  172. return false;
  173. }
  174. static u16 hv_get_dev_type(const struct vmbus_channel *channel)
  175. {
  176. const guid_t *guid = &channel->offermsg.offer.if_type;
  177. u16 i;
  178. if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
  179. return HV_UNKNOWN;
  180. for (i = HV_IDE; i < HV_UNKNOWN; i++) {
  181. if (guid_equal(guid, &vmbus_devs[i].guid))
  182. return i;
  183. }
  184. pr_info("Unknown GUID: %pUl\n", guid);
  185. return i;
  186. }
  187. /**
  188. * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
  189. * @icmsghdrp: Pointer to msg header structure
  190. * @buf: Raw buffer channel data
  191. * @buflen: Length of the raw buffer channel data.
  192. * @fw_version: The framework versions we can support.
  193. * @fw_vercnt: The size of @fw_version.
  194. * @srv_version: The service versions we can support.
  195. * @srv_vercnt: The size of @srv_version.
  196. * @nego_fw_version: The selected framework version.
  197. * @nego_srv_version: The selected service version.
  198. *
  199. * Note: Versions are given in decreasing order.
  200. *
  201. * Set up and fill in default negotiate response message.
  202. * Mainly used by Hyper-V drivers.
  203. */
  204. bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
  205. u32 buflen, const int *fw_version, int fw_vercnt,
  206. const int *srv_version, int srv_vercnt,
  207. int *nego_fw_version, int *nego_srv_version)
  208. {
  209. int icframe_major, icframe_minor;
  210. int icmsg_major, icmsg_minor;
  211. int fw_major, fw_minor;
  212. int srv_major, srv_minor;
  213. int i, j;
  214. bool found_match = false;
  215. struct icmsg_negotiate *negop;
  216. /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */
  217. if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) {
  218. pr_err_ratelimited("Invalid icmsg negotiate\n");
  219. return false;
  220. }
  221. icmsghdrp->icmsgsize = 0x10;
  222. negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR];
  223. icframe_major = negop->icframe_vercnt;
  224. icframe_minor = 0;
  225. icmsg_major = negop->icmsg_vercnt;
  226. icmsg_minor = 0;
  227. /* Validate negop packet */
  228. if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
  229. icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
  230. ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) {
  231. pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n",
  232. icframe_major, icmsg_major);
  233. goto fw_error;
  234. }
  235. /*
  236. * Select the framework version number we will
  237. * support.
  238. */
  239. for (i = 0; i < fw_vercnt; i++) {
  240. fw_major = (fw_version[i] >> 16);
  241. fw_minor = (fw_version[i] & 0xFFFF);
  242. for (j = 0; j < negop->icframe_vercnt; j++) {
  243. if ((negop->icversion_data[j].major == fw_major) &&
  244. (negop->icversion_data[j].minor == fw_minor)) {
  245. icframe_major = negop->icversion_data[j].major;
  246. icframe_minor = negop->icversion_data[j].minor;
  247. found_match = true;
  248. break;
  249. }
  250. }
  251. if (found_match)
  252. break;
  253. }
  254. if (!found_match)
  255. goto fw_error;
  256. found_match = false;
  257. for (i = 0; i < srv_vercnt; i++) {
  258. srv_major = (srv_version[i] >> 16);
  259. srv_minor = (srv_version[i] & 0xFFFF);
  260. for (j = negop->icframe_vercnt;
  261. (j < negop->icframe_vercnt + negop->icmsg_vercnt);
  262. j++) {
  263. if ((negop->icversion_data[j].major == srv_major) &&
  264. (negop->icversion_data[j].minor == srv_minor)) {
  265. icmsg_major = negop->icversion_data[j].major;
  266. icmsg_minor = negop->icversion_data[j].minor;
  267. found_match = true;
  268. break;
  269. }
  270. }
  271. if (found_match)
  272. break;
  273. }
  274. /*
  275. * Respond with the framework and service
  276. * version numbers we can support.
  277. */
  278. fw_error:
  279. if (!found_match) {
  280. negop->icframe_vercnt = 0;
  281. negop->icmsg_vercnt = 0;
  282. } else {
  283. negop->icframe_vercnt = 1;
  284. negop->icmsg_vercnt = 1;
  285. }
  286. if (nego_fw_version)
  287. *nego_fw_version = (icframe_major << 16) | icframe_minor;
  288. if (nego_srv_version)
  289. *nego_srv_version = (icmsg_major << 16) | icmsg_minor;
  290. negop->icversion_data[0].major = icframe_major;
  291. negop->icversion_data[0].minor = icframe_minor;
  292. negop->icversion_data[1].major = icmsg_major;
  293. negop->icversion_data[1].minor = icmsg_minor;
  294. return found_match;
  295. }
  296. EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
  297. /*
  298. * alloc_channel - Allocate and initialize a vmbus channel object
  299. */
  300. static struct vmbus_channel *alloc_channel(void)
  301. {
  302. struct vmbus_channel *channel;
  303. channel = kzalloc_obj(*channel, GFP_ATOMIC);
  304. if (!channel)
  305. return NULL;
  306. spin_lock_init(&channel->sched_lock);
  307. init_completion(&channel->rescind_event);
  308. INIT_LIST_HEAD(&channel->sc_list);
  309. tasklet_init(&channel->callback_event,
  310. vmbus_on_event, (unsigned long)channel);
  311. hv_ringbuffer_pre_init(channel);
  312. return channel;
  313. }
  314. /*
  315. * free_channel - Release the resources used by the vmbus channel object
  316. */
  317. static void free_channel(struct vmbus_channel *channel)
  318. {
  319. tasklet_kill(&channel->callback_event);
  320. vmbus_remove_channel_attr_group(channel);
  321. kobject_put(&channel->kobj);
  322. }
  323. void vmbus_channel_map_relid(struct vmbus_channel *channel)
  324. {
  325. if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
  326. return;
  327. /*
  328. * The mapping of the channel's relid is visible from the CPUs that
  329. * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
  330. * execute:
  331. *
  332. * (a) In the "normal (i.e., not resuming from hibernation)" path,
  333. * the full barrier in virt_store_mb() guarantees that the store
  334. * is propagated to all CPUs before the add_channel_work work
  335. * is queued. In turn, add_channel_work is queued before the
  336. * channel's ring buffer is allocated/initialized and the
  337. * OPENCHANNEL message for the channel is sent in vmbus_open().
  338. * Hyper-V won't start sending the interrupts for the channel
  339. * before the OPENCHANNEL message is acked. The memory barrier
  340. * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
  341. * that vmbus_chan_sched() must find the channel's relid in
  342. * recv_int_page before retrieving the channel pointer from the
  343. * array of channels.
  344. *
  345. * (b) In the "resuming from hibernation" path, the virt_store_mb()
  346. * guarantees that the store is propagated to all CPUs before
  347. * the VMBus connection is marked as ready for the resume event
  348. * (cf. check_ready_for_resume_event()). The interrupt handler
  349. * of the VMBus driver and vmbus_chan_sched() can not run before
  350. * vmbus_bus_resume() has completed execution (cf. resume_noirq).
  351. */
  352. virt_store_mb(
  353. vmbus_connection.channels[channel->offermsg.child_relid],
  354. channel);
  355. }
  356. void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
  357. {
  358. if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
  359. return;
  360. WRITE_ONCE(
  361. vmbus_connection.channels[channel->offermsg.child_relid],
  362. NULL);
  363. }
  364. static void vmbus_release_relid(u32 relid)
  365. {
  366. struct vmbus_channel_relid_released msg;
  367. int ret;
  368. memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
  369. msg.child_relid = relid;
  370. msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
  371. ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
  372. true);
  373. trace_vmbus_release_relid(&msg, ret);
  374. }
  375. void hv_process_channel_removal(struct vmbus_channel *channel)
  376. {
  377. lockdep_assert_held(&vmbus_connection.channel_mutex);
  378. BUG_ON(!channel->rescind);
  379. /*
  380. * hv_process_channel_removal() could find INVALID_RELID only for
  381. * hv_sock channels. See the inline comments in vmbus_onoffer().
  382. */
  383. WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
  384. !is_hvsock_channel(channel));
  385. /*
  386. * Upon suspend, an in-use hv_sock channel is removed from the array of
  387. * channels and the relid is invalidated. After hibernation, when the
  388. * user-space application destroys the channel, it's unnecessary and
  389. * unsafe to remove the channel from the array of channels. See also
  390. * the inline comments before the call of vmbus_release_relid() below.
  391. */
  392. if (channel->offermsg.child_relid != INVALID_RELID)
  393. vmbus_channel_unmap_relid(channel);
  394. if (channel->primary_channel == NULL)
  395. list_del(&channel->listentry);
  396. else
  397. list_del(&channel->sc_list);
  398. /*
  399. * If this is a "perf" channel, updates the hv_numa_map[] masks so that
  400. * init_vp_index() can (re-)use the CPU.
  401. */
  402. if (hv_is_perf_channel(channel))
  403. hv_clear_allocated_cpu(channel->target_cpu);
  404. /*
  405. * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
  406. * the relid is invalidated; after hibernation, when the user-space app
  407. * destroys the channel, the relid is INVALID_RELID, and in this case
  408. * it's unnecessary and unsafe to release the old relid, since the same
  409. * relid can refer to a completely different channel now.
  410. */
  411. if (channel->offermsg.child_relid != INVALID_RELID)
  412. vmbus_release_relid(channel->offermsg.child_relid);
  413. free_channel(channel);
  414. }
  415. void vmbus_free_channels(void)
  416. {
  417. struct vmbus_channel *channel, *tmp;
  418. list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
  419. listentry) {
  420. /* hv_process_channel_removal() needs this */
  421. channel->rescind = true;
  422. vmbus_device_unregister(channel->device_obj);
  423. }
  424. }
  425. /* Note: the function can run concurrently for primary/sub channels. */
  426. static void vmbus_add_channel_work(struct work_struct *work)
  427. {
  428. struct vmbus_channel *newchannel =
  429. container_of(work, struct vmbus_channel, add_channel_work);
  430. struct vmbus_channel *primary_channel = newchannel->primary_channel;
  431. int ret;
  432. /*
  433. * This state is used to indicate a successful open
  434. * so that when we do close the channel normally, we
  435. * can cleanup properly.
  436. */
  437. newchannel->state = CHANNEL_OPEN_STATE;
  438. if (primary_channel != NULL) {
  439. /* newchannel is a sub-channel. */
  440. struct hv_device *dev = primary_channel->device_obj;
  441. if (vmbus_add_channel_kobj(dev, newchannel))
  442. goto err_deq_chan;
  443. if (primary_channel->sc_creation_callback != NULL)
  444. primary_channel->sc_creation_callback(newchannel);
  445. newchannel->probe_done = true;
  446. return;
  447. }
  448. /*
  449. * Start the process of binding the primary channel to the driver
  450. */
  451. newchannel->device_obj = vmbus_device_create(
  452. &newchannel->offermsg.offer.if_type,
  453. &newchannel->offermsg.offer.if_instance,
  454. newchannel);
  455. if (!newchannel->device_obj)
  456. goto err_deq_chan;
  457. newchannel->device_obj->device_id = newchannel->device_id;
  458. /*
  459. * Add the new device to the bus. This will kick off device-driver
  460. * binding which eventually invokes the device driver's AddDevice()
  461. * method.
  462. *
  463. * If vmbus_device_register() fails, the 'device_obj' is freed in
  464. * vmbus_device_release() as called by device_unregister() in the
  465. * error path of vmbus_device_register(). In the outside error
  466. * path, there's no need to free it.
  467. */
  468. ret = vmbus_device_register(newchannel->device_obj);
  469. if (ret != 0) {
  470. pr_err("unable to add child device object (relid %d)\n",
  471. newchannel->offermsg.child_relid);
  472. goto err_deq_chan;
  473. }
  474. newchannel->probe_done = true;
  475. return;
  476. err_deq_chan:
  477. mutex_lock(&vmbus_connection.channel_mutex);
  478. /*
  479. * We need to set the flag, otherwise
  480. * vmbus_onoffer_rescind() can be blocked.
  481. */
  482. newchannel->probe_done = true;
  483. if (primary_channel == NULL)
  484. list_del(&newchannel->listentry);
  485. else
  486. list_del(&newchannel->sc_list);
  487. /* vmbus_process_offer() has mapped the channel. */
  488. vmbus_channel_unmap_relid(newchannel);
  489. mutex_unlock(&vmbus_connection.channel_mutex);
  490. vmbus_release_relid(newchannel->offermsg.child_relid);
  491. free_channel(newchannel);
  492. }
  493. /*
  494. * vmbus_process_offer - Process the offer by creating a channel/device
  495. * associated with this offer
  496. */
  497. static void vmbus_process_offer(struct vmbus_channel *newchannel)
  498. {
  499. struct vmbus_channel *channel;
  500. struct workqueue_struct *wq;
  501. bool fnew = true;
  502. /*
  503. * Synchronize vmbus_process_offer() and CPU hotplugging:
  504. *
  505. * CPU1 CPU2
  506. *
  507. * [vmbus_process_offer()] [Hot removal of the CPU]
  508. *
  509. * CPU_READ_LOCK CPUS_WRITE_LOCK
  510. * LOAD cpu_online_mask SEARCH chn_list
  511. * STORE target_cpu LOAD target_cpu
  512. * INSERT chn_list STORE cpu_online_mask
  513. * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK
  514. *
  515. * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
  516. * CPU2's SEARCH from *not* seeing CPU1's INSERT
  517. *
  518. * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
  519. * CPU2's LOAD from *not* seing CPU1's STORE
  520. */
  521. cpus_read_lock();
  522. /*
  523. * Serializes the modifications of the chn_list list as well as
  524. * the accesses to next_numa_node_id in init_vp_index().
  525. */
  526. mutex_lock(&vmbus_connection.channel_mutex);
  527. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  528. if (guid_equal(&channel->offermsg.offer.if_type,
  529. &newchannel->offermsg.offer.if_type) &&
  530. guid_equal(&channel->offermsg.offer.if_instance,
  531. &newchannel->offermsg.offer.if_instance)) {
  532. fnew = false;
  533. newchannel->primary_channel = channel;
  534. break;
  535. }
  536. }
  537. init_vp_index(newchannel);
  538. /* Remember the channels that should be cleaned up upon suspend. */
  539. if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
  540. atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
  541. /*
  542. * Now that we have acquired the channel_mutex,
  543. * we can release the potentially racing rescind thread.
  544. */
  545. atomic_dec(&vmbus_connection.offer_in_progress);
  546. if (fnew) {
  547. list_add_tail(&newchannel->listentry,
  548. &vmbus_connection.chn_list);
  549. } else {
  550. /*
  551. * Check to see if this is a valid sub-channel.
  552. */
  553. if (newchannel->offermsg.offer.sub_channel_index == 0) {
  554. mutex_unlock(&vmbus_connection.channel_mutex);
  555. cpus_read_unlock();
  556. /*
  557. * Don't call free_channel(), because newchannel->kobj
  558. * is not initialized yet.
  559. */
  560. kfree(newchannel);
  561. WARN_ON_ONCE(1);
  562. return;
  563. }
  564. /*
  565. * Process the sub-channel.
  566. */
  567. list_add_tail(&newchannel->sc_list, &channel->sc_list);
  568. }
  569. vmbus_channel_map_relid(newchannel);
  570. mutex_unlock(&vmbus_connection.channel_mutex);
  571. cpus_read_unlock();
  572. /*
  573. * vmbus_process_offer() mustn't call channel->sc_creation_callback()
  574. * directly for sub-channels, because sc_creation_callback() ->
  575. * vmbus_open() may never get the host's response to the
  576. * OPEN_CHANNEL message (the host may rescind a channel at any time,
  577. * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
  578. * may not wake up the vmbus_open() as it's blocked due to a non-zero
  579. * vmbus_connection.offer_in_progress, and finally we have a deadlock.
  580. *
  581. * The above is also true for primary channels, if the related device
  582. * drivers use sync probing mode by default.
  583. *
  584. * And, usually the handling of primary channels and sub-channels can
  585. * depend on each other, so we should offload them to different
  586. * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
  587. * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
  588. * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
  589. * and waits for all the sub-channels to appear, but the latter
  590. * can't get the rtnl_lock and this blocks the handling of
  591. * sub-channels.
  592. */
  593. INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
  594. wq = fnew ? vmbus_connection.handle_primary_chan_wq :
  595. vmbus_connection.handle_sub_chan_wq;
  596. queue_work(wq, &newchannel->add_channel_work);
  597. }
  598. /*
  599. * Check if CPUs used by other channels of the same device.
  600. * It should only be called by init_vp_index().
  601. */
  602. static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
  603. {
  604. struct vmbus_channel *primary = chn->primary_channel;
  605. struct vmbus_channel *sc;
  606. lockdep_assert_held(&vmbus_connection.channel_mutex);
  607. if (!primary)
  608. return false;
  609. if (primary->target_cpu == cpu)
  610. return true;
  611. list_for_each_entry(sc, &primary->sc_list, sc_list)
  612. if (sc != chn && sc->target_cpu == cpu)
  613. return true;
  614. return false;
  615. }
  616. /*
  617. * We use this state to statically distribute the channel interrupt load.
  618. */
  619. static int next_numa_node_id;
  620. /*
  621. * We can statically distribute the incoming channel interrupt load
  622. * by binding a channel to VCPU.
  623. *
  624. * For non-performance critical channels we assign the VMBUS_CONNECT_CPU.
  625. * Performance critical channels will be distributed evenly among all
  626. * the available NUMA nodes. Once the node is assigned, we will assign
  627. * the CPU based on a simple round robin scheme.
  628. */
  629. static void init_vp_index(struct vmbus_channel *channel)
  630. {
  631. bool perf_chn = hv_is_perf_channel(channel);
  632. u32 i, ncpu = num_online_cpus();
  633. cpumask_var_t available_mask;
  634. struct cpumask *allocated_mask;
  635. const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
  636. u32 target_cpu;
  637. int numa_node;
  638. if (!perf_chn ||
  639. !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
  640. cpumask_empty(hk_mask)) {
  641. /*
  642. * If the channel is not a performance critical
  643. * channel, bind it to VMBUS_CONNECT_CPU.
  644. * In case alloc_cpumask_var() fails, bind it to
  645. * VMBUS_CONNECT_CPU.
  646. * If all the cpus are isolated, bind it to
  647. * VMBUS_CONNECT_CPU.
  648. */
  649. channel->target_cpu = VMBUS_CONNECT_CPU;
  650. if (perf_chn)
  651. hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
  652. return;
  653. }
  654. for (i = 1; i <= ncpu + 1; i++) {
  655. while (true) {
  656. numa_node = next_numa_node_id++;
  657. if (numa_node == nr_node_ids) {
  658. next_numa_node_id = 0;
  659. continue;
  660. }
  661. if (cpumask_empty(cpumask_of_node(numa_node)))
  662. continue;
  663. break;
  664. }
  665. allocated_mask = &hv_context.hv_numa_map[numa_node];
  666. retry:
  667. cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
  668. cpumask_and(available_mask, available_mask, hk_mask);
  669. if (cpumask_empty(available_mask)) {
  670. /*
  671. * We have cycled through all the CPUs in the node;
  672. * reset the allocated map.
  673. */
  674. cpumask_clear(allocated_mask);
  675. goto retry;
  676. }
  677. target_cpu = cpumask_first(available_mask);
  678. cpumask_set_cpu(target_cpu, allocated_mask);
  679. if (channel->offermsg.offer.sub_channel_index >= ncpu ||
  680. i > ncpu || !hv_cpuself_used(target_cpu, channel))
  681. break;
  682. }
  683. channel->target_cpu = target_cpu;
  684. free_cpumask_var(available_mask);
  685. }
  686. #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */
  687. #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */
  688. #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
  689. #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */
  690. #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
  691. static void vmbus_wait_for_unload(void)
  692. {
  693. int cpu;
  694. void *page_addr;
  695. struct hv_message *msg;
  696. struct vmbus_channel_message_header *hdr;
  697. u32 message_type, i;
  698. /*
  699. * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
  700. * used for initial contact or to CPU0 depending on host version. When
  701. * we're crashing on a different CPU let's hope that IRQ handler on
  702. * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
  703. * functional and vmbus_unload_response() will complete
  704. * vmbus_connection.unload_event. If not, the last thing we can do is
  705. * read message pages for all CPUs directly.
  706. *
  707. * Wait up to 100 seconds since an Azure host must writeback any dirty
  708. * data in its disk cache before the VMbus UNLOAD request will
  709. * complete. This flushing has been empirically observed to take up
  710. * to 50 seconds in cases with a lot of dirty data, so allow additional
  711. * leeway and for inaccuracies in mdelay(). But eventually time out so
  712. * that the panic path can't get hung forever in case the response
  713. * message isn't seen.
  714. */
  715. for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
  716. if (completion_done(&vmbus_connection.unload_event))
  717. goto completed;
  718. for_each_present_cpu(cpu) {
  719. struct hv_per_cpu_context *hv_cpu
  720. = per_cpu_ptr(hv_context.cpu_context, cpu);
  721. /*
  722. * In a CoCo VM the hyp_synic_message_page is not allocated
  723. * in hv_synic_alloc(). Instead it is set/cleared in
  724. * hv_hyp_synic_enable_regs() and hv_hyp_synic_disable_regs()
  725. * such that it is set only when the CPU is online. If
  726. * not all present CPUs are online, the message page
  727. * might be NULL, so skip such CPUs.
  728. */
  729. page_addr = hv_cpu->hyp_synic_message_page;
  730. if (!page_addr)
  731. continue;
  732. msg = (struct hv_message *)page_addr
  733. + VMBUS_MESSAGE_SINT;
  734. message_type = READ_ONCE(msg->header.message_type);
  735. if (message_type == HVMSG_NONE)
  736. continue;
  737. hdr = (struct vmbus_channel_message_header *)
  738. msg->u.payload;
  739. if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
  740. complete(&vmbus_connection.unload_event);
  741. vmbus_signal_eom(msg, message_type);
  742. }
  743. /*
  744. * Give a notice periodically so someone watching the
  745. * serial output won't think it is completely hung.
  746. */
  747. if (!(i % UNLOAD_MSG_LOOPS))
  748. pr_notice("Waiting for VMBus UNLOAD to complete\n");
  749. mdelay(UNLOAD_DELAY_UNIT_MS);
  750. }
  751. pr_err("Continuing even though VMBus UNLOAD did not complete\n");
  752. completed:
  753. /*
  754. * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
  755. * maybe-pending messages on all CPUs to be able to receive new
  756. * messages after we reconnect.
  757. */
  758. for_each_present_cpu(cpu) {
  759. struct hv_per_cpu_context *hv_cpu
  760. = per_cpu_ptr(hv_context.cpu_context, cpu);
  761. page_addr = hv_cpu->hyp_synic_message_page;
  762. if (!page_addr)
  763. continue;
  764. msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
  765. msg->header.message_type = HVMSG_NONE;
  766. }
  767. }
  768. /*
  769. * vmbus_unload_response - Handler for the unload response.
  770. */
  771. static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
  772. {
  773. /*
  774. * This is a global event; just wakeup the waiting thread.
  775. * Once we successfully unload, we can cleanup the monitor state.
  776. *
  777. * NB. A malicious or compromised Hyper-V could send a spurious
  778. * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call
  779. * of the complete() below. Make sure that unload_event has been
  780. * initialized by the time this complete() is executed.
  781. */
  782. complete(&vmbus_connection.unload_event);
  783. }
  784. void vmbus_initiate_unload(bool crash)
  785. {
  786. struct vmbus_channel_message_header hdr;
  787. if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
  788. return;
  789. /* Pre-Win2012R2 hosts don't support reconnect */
  790. if (vmbus_proto_version < VERSION_WIN8_1)
  791. return;
  792. reinit_completion(&vmbus_connection.unload_event);
  793. memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
  794. hdr.msgtype = CHANNELMSG_UNLOAD;
  795. vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
  796. !crash);
  797. /*
  798. * vmbus_initiate_unload() is also called on crash and the crash can be
  799. * happening in an interrupt context, where scheduling is impossible.
  800. */
  801. if (!crash)
  802. wait_for_completion(&vmbus_connection.unload_event);
  803. else
  804. vmbus_wait_for_unload();
  805. }
  806. static void vmbus_setup_channel_state(struct vmbus_channel *channel,
  807. struct vmbus_channel_offer_channel *offer)
  808. {
  809. /*
  810. * Setup state for signalling the host.
  811. */
  812. channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
  813. channel->is_dedicated_interrupt =
  814. (offer->is_dedicated_interrupt != 0);
  815. channel->sig_event = offer->connection_id;
  816. memcpy(&channel->offermsg, offer,
  817. sizeof(struct vmbus_channel_offer_channel));
  818. channel->monitor_grp = (u8)offer->monitorid / 32;
  819. channel->monitor_bit = (u8)offer->monitorid % 32;
  820. channel->device_id = hv_get_dev_type(channel);
  821. }
  822. /*
  823. * find_primary_channel_by_offer - Get the channel object given the new offer.
  824. * This is only used in the resume path of hibernation.
  825. */
  826. static struct vmbus_channel *
  827. find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
  828. {
  829. struct vmbus_channel *channel = NULL, *iter;
  830. const guid_t *inst1, *inst2;
  831. /* Ignore sub-channel offers. */
  832. if (offer->offer.sub_channel_index != 0)
  833. return NULL;
  834. mutex_lock(&vmbus_connection.channel_mutex);
  835. list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
  836. inst1 = &iter->offermsg.offer.if_instance;
  837. inst2 = &offer->offer.if_instance;
  838. if (guid_equal(inst1, inst2)) {
  839. channel = iter;
  840. break;
  841. }
  842. }
  843. mutex_unlock(&vmbus_connection.channel_mutex);
  844. return channel;
  845. }
  846. static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer)
  847. {
  848. const guid_t *guid = &offer->offer.if_type;
  849. u16 i;
  850. if (!hv_is_isolation_supported())
  851. return true;
  852. if (is_hvsock_offer(offer))
  853. return true;
  854. for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) {
  855. if (guid_equal(guid, &vmbus_devs[i].guid))
  856. return vmbus_devs[i].allowed_in_isolated;
  857. }
  858. return false;
  859. }
  860. /*
  861. * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  862. *
  863. */
  864. static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
  865. {
  866. struct vmbus_channel_offer_channel *offer;
  867. struct vmbus_channel *oldchannel, *newchannel;
  868. size_t offer_sz;
  869. bool co_ring_buffer, co_external_memory;
  870. offer = (struct vmbus_channel_offer_channel *)hdr;
  871. trace_vmbus_onoffer(offer);
  872. if (!vmbus_is_valid_offer(offer)) {
  873. pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n",
  874. offer->child_relid);
  875. atomic_dec(&vmbus_connection.offer_in_progress);
  876. return;
  877. }
  878. co_ring_buffer = is_co_ring_buffer(offer);
  879. co_external_memory = is_co_external_memory(offer);
  880. if (!co_ring_buffer && co_external_memory) {
  881. pr_err("Invalid offer relid=%d: the ring buffer isn't encrypted\n",
  882. offer->child_relid);
  883. return;
  884. }
  885. if (co_ring_buffer || co_external_memory) {
  886. if (vmbus_proto_version < VERSION_WIN10_V6_0 || !vmbus_is_confidential()) {
  887. pr_err("Invalid offer relid=%d: no support for confidential VMBus\n",
  888. offer->child_relid);
  889. atomic_dec(&vmbus_connection.offer_in_progress);
  890. return;
  891. }
  892. }
  893. oldchannel = find_primary_channel_by_offer(offer);
  894. if (oldchannel != NULL) {
  895. /*
  896. * We're resuming from hibernation: all the sub-channel and
  897. * hv_sock channels we had before the hibernation should have
  898. * been cleaned up, and now we must be seeing a re-offered
  899. * primary channel that we had before the hibernation.
  900. */
  901. /*
  902. * { Initially: channel relid = INVALID_RELID,
  903. * channels[valid_relid] = NULL }
  904. *
  905. * CPU1 CPU2
  906. *
  907. * [vmbus_onoffer()] [vmbus_device_release()]
  908. *
  909. * LOCK channel_mutex LOCK channel_mutex
  910. * STORE channel relid = valid_relid LOAD r1 = channel relid
  911. * MAP_RELID channel if (r1 != INVALID_RELID)
  912. * UNLOCK channel_mutex UNMAP_RELID channel
  913. * UNLOCK channel_mutex
  914. *
  915. * Forbids: r1 == valid_relid &&
  916. * channels[valid_relid] == channel
  917. *
  918. * Note. r1 can be INVALID_RELID only for an hv_sock channel.
  919. * None of the hv_sock channels which were present before the
  920. * suspend are re-offered upon the resume. See the WARN_ON()
  921. * in hv_process_channel_removal().
  922. */
  923. mutex_lock(&vmbus_connection.channel_mutex);
  924. atomic_dec(&vmbus_connection.offer_in_progress);
  925. WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
  926. /* Fix up the relid. */
  927. oldchannel->offermsg.child_relid = offer->child_relid;
  928. offer_sz = sizeof(*offer);
  929. if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
  930. /*
  931. * This is not an error, since the host can also change
  932. * the other field(s) of the offer, e.g. on WS RS5
  933. * (Build 17763), the offer->connection_id of the
  934. * Mellanox VF vmbus device can change when the host
  935. * reoffers the device upon resume.
  936. */
  937. pr_debug("vmbus offer changed: relid=%d\n",
  938. offer->child_relid);
  939. print_hex_dump_debug("Old vmbus offer: ",
  940. DUMP_PREFIX_OFFSET, 16, 4,
  941. &oldchannel->offermsg, offer_sz,
  942. false);
  943. print_hex_dump_debug("New vmbus offer: ",
  944. DUMP_PREFIX_OFFSET, 16, 4,
  945. offer, offer_sz, false);
  946. /* Fix up the old channel. */
  947. vmbus_setup_channel_state(oldchannel, offer);
  948. }
  949. /* Add the channel back to the array of channels. */
  950. vmbus_channel_map_relid(oldchannel);
  951. mutex_unlock(&vmbus_connection.channel_mutex);
  952. return;
  953. }
  954. /* Allocate the channel object and save this offer. */
  955. newchannel = alloc_channel();
  956. if (!newchannel) {
  957. vmbus_release_relid(offer->child_relid);
  958. atomic_dec(&vmbus_connection.offer_in_progress);
  959. pr_err("Unable to allocate channel object\n");
  960. return;
  961. }
  962. newchannel->co_ring_buffer = co_ring_buffer;
  963. newchannel->co_external_memory = co_external_memory;
  964. vmbus_setup_channel_state(newchannel, offer);
  965. vmbus_process_offer(newchannel);
  966. }
  967. static void check_ready_for_suspend_event(void)
  968. {
  969. /*
  970. * If all the sub-channels or hv_sock channels have been cleaned up,
  971. * then it's safe to suspend.
  972. */
  973. if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
  974. complete(&vmbus_connection.ready_for_suspend_event);
  975. }
  976. /*
  977. * vmbus_onoffer_rescind - Rescind offer handler.
  978. *
  979. * We queue a work item to process this offer synchronously
  980. */
  981. static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
  982. {
  983. struct vmbus_channel_rescind_offer *rescind;
  984. struct vmbus_channel *channel;
  985. struct device *dev;
  986. bool clean_up_chan_for_suspend;
  987. rescind = (struct vmbus_channel_rescind_offer *)hdr;
  988. trace_vmbus_onoffer_rescind(rescind);
  989. /*
  990. * The offer msg and the corresponding rescind msg
  991. * from the host are guranteed to be ordered -
  992. * offer comes in first and then the rescind.
  993. * Since we process these events in work elements,
  994. * and with preemption, we may end up processing
  995. * the events out of order. We rely on the synchronization
  996. * provided by offer_in_progress and by channel_mutex for
  997. * ordering these events:
  998. *
  999. * { Initially: offer_in_progress = 1 }
  1000. *
  1001. * CPU1 CPU2
  1002. *
  1003. * [vmbus_onoffer()] [vmbus_onoffer_rescind()]
  1004. *
  1005. * LOCK channel_mutex WAIT_ON offer_in_progress == 0
  1006. * DECREMENT offer_in_progress LOCK channel_mutex
  1007. * STORE channels[] LOAD channels[]
  1008. * UNLOCK channel_mutex UNLOCK channel_mutex
  1009. *
  1010. * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
  1011. */
  1012. while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
  1013. /*
  1014. * We wait here until any channel offer is currently
  1015. * being processed.
  1016. */
  1017. msleep(1);
  1018. }
  1019. mutex_lock(&vmbus_connection.channel_mutex);
  1020. channel = relid2channel(rescind->child_relid);
  1021. if (channel != NULL) {
  1022. /*
  1023. * Guarantee that no other instance of vmbus_onoffer_rescind()
  1024. * has got a reference to the channel object. Synchronize on
  1025. * &vmbus_connection.channel_mutex.
  1026. */
  1027. if (channel->rescind_ref) {
  1028. mutex_unlock(&vmbus_connection.channel_mutex);
  1029. return;
  1030. }
  1031. channel->rescind_ref = true;
  1032. }
  1033. mutex_unlock(&vmbus_connection.channel_mutex);
  1034. if (channel == NULL) {
  1035. /*
  1036. * We failed in processing the offer message;
  1037. * we would have cleaned up the relid in that
  1038. * failure path.
  1039. */
  1040. return;
  1041. }
  1042. clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
  1043. is_sub_channel(channel);
  1044. /*
  1045. * Before setting channel->rescind in vmbus_rescind_cleanup(), we
  1046. * should make sure the channel callback is not running any more.
  1047. */
  1048. vmbus_reset_channel_cb(channel);
  1049. /*
  1050. * Now wait for offer handling to complete.
  1051. */
  1052. vmbus_rescind_cleanup(channel);
  1053. while (READ_ONCE(channel->probe_done) == false) {
  1054. /*
  1055. * We wait here until any channel offer is currently
  1056. * being processed.
  1057. */
  1058. msleep(1);
  1059. }
  1060. /*
  1061. * At this point, the rescind handling can proceed safely.
  1062. */
  1063. if (channel->device_obj) {
  1064. if (channel->chn_rescind_callback) {
  1065. channel->chn_rescind_callback(channel);
  1066. if (clean_up_chan_for_suspend)
  1067. check_ready_for_suspend_event();
  1068. return;
  1069. }
  1070. /*
  1071. * We will have to unregister this device from the
  1072. * driver core.
  1073. */
  1074. dev = get_device(&channel->device_obj->device);
  1075. if (dev) {
  1076. vmbus_device_unregister(channel->device_obj);
  1077. put_device(dev);
  1078. }
  1079. } else if (channel->primary_channel != NULL) {
  1080. /*
  1081. * Sub-channel is being rescinded. Following is the channel
  1082. * close sequence when initiated from the driveri (refer to
  1083. * vmbus_close() for details):
  1084. * 1. Close all sub-channels first
  1085. * 2. Then close the primary channel.
  1086. */
  1087. mutex_lock(&vmbus_connection.channel_mutex);
  1088. if (channel->state == CHANNEL_OPEN_STATE) {
  1089. /*
  1090. * The channel is currently not open;
  1091. * it is safe for us to cleanup the channel.
  1092. */
  1093. hv_process_channel_removal(channel);
  1094. } else {
  1095. complete(&channel->rescind_event);
  1096. }
  1097. mutex_unlock(&vmbus_connection.channel_mutex);
  1098. }
  1099. /* The "channel" may have been freed. Do not access it any longer. */
  1100. if (clean_up_chan_for_suspend)
  1101. check_ready_for_suspend_event();
  1102. }
  1103. void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
  1104. {
  1105. BUG_ON(!is_hvsock_channel(channel));
  1106. /* We always get a rescind msg when a connection is closed. */
  1107. while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
  1108. msleep(1);
  1109. vmbus_device_unregister(channel->device_obj);
  1110. }
  1111. EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
  1112. /*
  1113. * vmbus_onoffers_delivered -
  1114. * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all
  1115. * boot-time offers are delivered. A boot-time offer is for the primary
  1116. * channel for any virtual hardware configured in the VM at the time it boots.
  1117. * Boot-time offers include offers for physical devices assigned to the VM
  1118. * via Hyper-V's Discrete Device Assignment (DDA) functionality that are
  1119. * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs).
  1120. * Boot-time offers do not include offers for VMBus sub-channels. Because
  1121. * devices can be hot-added to the VM after it is booted, additional channel
  1122. * offers that aren't boot-time offers can be received at any time after the
  1123. * all-offers-delivered message.
  1124. *
  1125. * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered
  1126. * to be assigned to the VM at boot-time, and offers for VFs may occur after
  1127. * the all-offers-delivered message. VFs are optional accelerators to the
  1128. * synthetic VMBus NIC and are effectively hot-added only after the VMBus
  1129. * NIC channel is opened (once it knows the guest can support it, via the
  1130. * sriov bit in the netvsc protocol).
  1131. */
  1132. static void vmbus_onoffers_delivered(
  1133. struct vmbus_channel_message_header *hdr)
  1134. {
  1135. complete(&vmbus_connection.all_offers_delivered_event);
  1136. }
  1137. /*
  1138. * vmbus_onopen_result - Open result handler.
  1139. *
  1140. * This is invoked when we received a response to our channel open request.
  1141. * Find the matching request, copy the response and signal the requesting
  1142. * thread.
  1143. */
  1144. static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
  1145. {
  1146. struct vmbus_channel_open_result *result;
  1147. struct vmbus_channel_msginfo *msginfo;
  1148. struct vmbus_channel_message_header *requestheader;
  1149. struct vmbus_channel_open_channel *openmsg;
  1150. unsigned long flags;
  1151. result = (struct vmbus_channel_open_result *)hdr;
  1152. trace_vmbus_onopen_result(result);
  1153. /*
  1154. * Find the open msg, copy the result and signal/unblock the wait event
  1155. */
  1156. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1157. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1158. msglistentry) {
  1159. requestheader =
  1160. (struct vmbus_channel_message_header *)msginfo->msg;
  1161. if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
  1162. openmsg =
  1163. (struct vmbus_channel_open_channel *)msginfo->msg;
  1164. if (openmsg->child_relid == result->child_relid &&
  1165. openmsg->openid == result->openid) {
  1166. memcpy(&msginfo->response.open_result,
  1167. result,
  1168. sizeof(
  1169. struct vmbus_channel_open_result));
  1170. complete(&msginfo->waitevent);
  1171. break;
  1172. }
  1173. }
  1174. }
  1175. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1176. }
  1177. /*
  1178. * vmbus_ongpadl_created - GPADL created handler.
  1179. *
  1180. * This is invoked when we received a response to our gpadl create request.
  1181. * Find the matching request, copy the response and signal the requesting
  1182. * thread.
  1183. */
  1184. static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
  1185. {
  1186. struct vmbus_channel_gpadl_created *gpadlcreated;
  1187. struct vmbus_channel_msginfo *msginfo;
  1188. struct vmbus_channel_message_header *requestheader;
  1189. struct vmbus_channel_gpadl_header *gpadlheader;
  1190. unsigned long flags;
  1191. gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
  1192. trace_vmbus_ongpadl_created(gpadlcreated);
  1193. /*
  1194. * Find the establish msg, copy the result and signal/unblock the wait
  1195. * event
  1196. */
  1197. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1198. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1199. msglistentry) {
  1200. requestheader =
  1201. (struct vmbus_channel_message_header *)msginfo->msg;
  1202. if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
  1203. gpadlheader =
  1204. (struct vmbus_channel_gpadl_header *)requestheader;
  1205. if ((gpadlcreated->child_relid ==
  1206. gpadlheader->child_relid) &&
  1207. (gpadlcreated->gpadl == gpadlheader->gpadl)) {
  1208. memcpy(&msginfo->response.gpadl_created,
  1209. gpadlcreated,
  1210. sizeof(
  1211. struct vmbus_channel_gpadl_created));
  1212. complete(&msginfo->waitevent);
  1213. break;
  1214. }
  1215. }
  1216. }
  1217. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1218. }
  1219. /*
  1220. * vmbus_onmodifychannel_response - Modify Channel response handler.
  1221. *
  1222. * This is invoked when we received a response to our channel modify request.
  1223. * Find the matching request, copy the response and signal the requesting thread.
  1224. */
  1225. static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
  1226. {
  1227. struct vmbus_channel_modifychannel_response *response;
  1228. struct vmbus_channel_msginfo *msginfo;
  1229. unsigned long flags;
  1230. response = (struct vmbus_channel_modifychannel_response *)hdr;
  1231. trace_vmbus_onmodifychannel_response(response);
  1232. /*
  1233. * Find the modify msg, copy the response and signal/unblock the wait event.
  1234. */
  1235. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1236. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
  1237. struct vmbus_channel_message_header *responseheader =
  1238. (struct vmbus_channel_message_header *)msginfo->msg;
  1239. if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
  1240. struct vmbus_channel_modifychannel *modifymsg;
  1241. modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
  1242. if (modifymsg->child_relid == response->child_relid) {
  1243. memcpy(&msginfo->response.modify_response, response,
  1244. sizeof(*response));
  1245. complete(&msginfo->waitevent);
  1246. break;
  1247. }
  1248. }
  1249. }
  1250. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1251. }
  1252. /*
  1253. * vmbus_ongpadl_torndown - GPADL torndown handler.
  1254. *
  1255. * This is invoked when we received a response to our gpadl teardown request.
  1256. * Find the matching request, copy the response and signal the requesting
  1257. * thread.
  1258. */
  1259. static void vmbus_ongpadl_torndown(
  1260. struct vmbus_channel_message_header *hdr)
  1261. {
  1262. struct vmbus_channel_gpadl_torndown *gpadl_torndown;
  1263. struct vmbus_channel_msginfo *msginfo;
  1264. struct vmbus_channel_message_header *requestheader;
  1265. struct vmbus_channel_gpadl_teardown *gpadl_teardown;
  1266. unsigned long flags;
  1267. gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
  1268. trace_vmbus_ongpadl_torndown(gpadl_torndown);
  1269. /*
  1270. * Find the open msg, copy the result and signal/unblock the wait event
  1271. */
  1272. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1273. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1274. msglistentry) {
  1275. requestheader =
  1276. (struct vmbus_channel_message_header *)msginfo->msg;
  1277. if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
  1278. gpadl_teardown =
  1279. (struct vmbus_channel_gpadl_teardown *)requestheader;
  1280. if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
  1281. memcpy(&msginfo->response.gpadl_torndown,
  1282. gpadl_torndown,
  1283. sizeof(
  1284. struct vmbus_channel_gpadl_torndown));
  1285. complete(&msginfo->waitevent);
  1286. break;
  1287. }
  1288. }
  1289. }
  1290. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1291. }
  1292. /*
  1293. * vmbus_onversion_response - Version response handler
  1294. *
  1295. * This is invoked when we received a response to our initiate contact request.
  1296. * Find the matching request, copy the response and signal the requesting
  1297. * thread.
  1298. */
  1299. static void vmbus_onversion_response(
  1300. struct vmbus_channel_message_header *hdr)
  1301. {
  1302. struct vmbus_channel_msginfo *msginfo;
  1303. struct vmbus_channel_message_header *requestheader;
  1304. struct vmbus_channel_version_response *version_response;
  1305. unsigned long flags;
  1306. version_response = (struct vmbus_channel_version_response *)hdr;
  1307. trace_vmbus_onversion_response(version_response);
  1308. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1309. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1310. msglistentry) {
  1311. requestheader =
  1312. (struct vmbus_channel_message_header *)msginfo->msg;
  1313. if (requestheader->msgtype ==
  1314. CHANNELMSG_INITIATE_CONTACT) {
  1315. memcpy(&msginfo->response.version_response,
  1316. version_response,
  1317. sizeof(struct vmbus_channel_version_response));
  1318. complete(&msginfo->waitevent);
  1319. }
  1320. }
  1321. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1322. }
  1323. /* Channel message dispatch table */
  1324. const struct vmbus_channel_message_table_entry
  1325. channel_message_table[CHANNELMSG_COUNT] = {
  1326. { CHANNELMSG_INVALID, 0, NULL, 0},
  1327. { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer,
  1328. sizeof(struct vmbus_channel_offer_channel)},
  1329. { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind,
  1330. sizeof(struct vmbus_channel_rescind_offer) },
  1331. { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0},
  1332. { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0},
  1333. { CHANNELMSG_OPENCHANNEL, 0, NULL, 0},
  1334. { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result,
  1335. sizeof(struct vmbus_channel_open_result)},
  1336. { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0},
  1337. { CHANNELMSG_GPADL_HEADER, 0, NULL, 0},
  1338. { CHANNELMSG_GPADL_BODY, 0, NULL, 0},
  1339. { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created,
  1340. sizeof(struct vmbus_channel_gpadl_created)},
  1341. { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0},
  1342. { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown,
  1343. sizeof(struct vmbus_channel_gpadl_torndown) },
  1344. { CHANNELMSG_RELID_RELEASED, 0, NULL, 0},
  1345. { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0},
  1346. { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response,
  1347. sizeof(struct vmbus_channel_version_response)},
  1348. { CHANNELMSG_UNLOAD, 0, NULL, 0},
  1349. { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0},
  1350. { CHANNELMSG_18, 0, NULL, 0},
  1351. { CHANNELMSG_19, 0, NULL, 0},
  1352. { CHANNELMSG_20, 0, NULL, 0},
  1353. { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0},
  1354. { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0},
  1355. { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0},
  1356. { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response,
  1357. sizeof(struct vmbus_channel_modifychannel_response)},
  1358. };
  1359. /*
  1360. * vmbus_onmessage - Handler for channel protocol messages.
  1361. *
  1362. * This is invoked in the vmbus worker thread context.
  1363. */
  1364. void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
  1365. {
  1366. trace_vmbus_on_message(hdr);
  1367. /*
  1368. * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
  1369. * out of bound and the message_handler pointer can not be NULL.
  1370. */
  1371. channel_message_table[hdr->msgtype].message_handler(hdr);
  1372. }
  1373. /*
  1374. * vmbus_request_offers - Send a request to get all our pending offers
  1375. * and wait for all boot-time offers to arrive.
  1376. */
  1377. int vmbus_request_offers(void)
  1378. {
  1379. struct vmbus_channel_message_header *msg;
  1380. struct vmbus_channel_msginfo *msginfo;
  1381. int ret;
  1382. msginfo = kzalloc(sizeof(*msginfo) +
  1383. sizeof(struct vmbus_channel_message_header),
  1384. GFP_KERNEL);
  1385. if (!msginfo)
  1386. return -ENOMEM;
  1387. msg = (struct vmbus_channel_message_header *)msginfo->msg;
  1388. msg->msgtype = CHANNELMSG_REQUESTOFFERS;
  1389. /*
  1390. * This REQUESTOFFERS message will result in the host sending an all
  1391. * offers delivered message after all the boot-time offers are sent.
  1392. */
  1393. ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
  1394. true);
  1395. trace_vmbus_request_offers(ret);
  1396. if (ret != 0) {
  1397. pr_err("Unable to request offers - %d\n", ret);
  1398. goto cleanup;
  1399. }
  1400. /*
  1401. * Wait for the host to send all boot-time offers.
  1402. * Keeping it as a best-effort mechanism, where a warning is
  1403. * printed if a timeout occurs, and execution is resumed.
  1404. */
  1405. if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event,
  1406. secs_to_jiffies(60))) {
  1407. pr_warn("timed out waiting for all boot-time offers to be delivered.\n");
  1408. }
  1409. /*
  1410. * Flush handling of offer messages (which may initiate work on
  1411. * other work queues).
  1412. */
  1413. flush_workqueue(vmbus_connection.work_queue);
  1414. /*
  1415. * Flush workqueue for processing the incoming offers. Subchannel
  1416. * offers and their processing can happen later, so there is no need to
  1417. * flush that workqueue here.
  1418. */
  1419. flush_workqueue(vmbus_connection.handle_primary_chan_wq);
  1420. cleanup:
  1421. kfree(msginfo);
  1422. return ret;
  1423. }
  1424. void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
  1425. void (*sc_cr_cb)(struct vmbus_channel *new_sc))
  1426. {
  1427. primary_channel->sc_creation_callback = sc_cr_cb;
  1428. }
  1429. EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
  1430. void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
  1431. void (*chn_rescind_cb)(struct vmbus_channel *))
  1432. {
  1433. channel->chn_rescind_callback = chn_rescind_cb;
  1434. }
  1435. EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);