mshv_vtl_main.c 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2023, Microsoft Corporation.
  4. *
  5. * Author:
  6. * Roman Kisel <romank@linux.microsoft.com>
  7. * Saurabh Sengar <ssengar@linux.microsoft.com>
  8. * Naman Jain <namjain@linux.microsoft.com>
  9. */
  10. #include <linux/kernel.h>
  11. #include <linux/module.h>
  12. #include <linux/miscdevice.h>
  13. #include <linux/anon_inodes.h>
  14. #include <linux/cpuhotplug.h>
  15. #include <linux/count_zeros.h>
  16. #include <linux/entry-virt.h>
  17. #include <linux/eventfd.h>
  18. #include <linux/poll.h>
  19. #include <linux/file.h>
  20. #include <linux/vmalloc.h>
  21. #include <asm/debugreg.h>
  22. #include <asm/mshyperv.h>
  23. #include <trace/events/ipi.h>
  24. #include <uapi/asm/mtrr.h>
  25. #include <uapi/linux/mshv.h>
  26. #include <hyperv/hvhdk.h>
  27. #include "../../kernel/fpu/legacy.h"
  28. #include "mshv.h"
  29. #include "mshv_vtl.h"
  30. #include "hyperv_vmbus.h"
  31. MODULE_AUTHOR("Microsoft");
  32. MODULE_LICENSE("GPL");
  33. MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver");
  34. #define MSHV_ENTRY_REASON_LOWER_VTL_CALL 0x1
  35. #define MSHV_ENTRY_REASON_INTERRUPT 0x2
  36. #define MSHV_ENTRY_REASON_INTERCEPT 0x3
  37. #define MSHV_REAL_OFF_SHIFT 16
  38. #define MSHV_PG_OFF_CPU_MASK (BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1)
  39. #define MSHV_RUN_PAGE_OFFSET 0
  40. #define MSHV_REG_PAGE_OFFSET 1
  41. #define VTL2_VMBUS_SINT_INDEX 7
  42. static struct device *mem_dev;
  43. static struct tasklet_struct msg_dpc;
  44. static wait_queue_head_t fd_wait_queue;
  45. static bool has_message;
  46. static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
  47. static DEFINE_MUTEX(flag_lock);
  48. static bool __read_mostly mshv_has_reg_page;
  49. /* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
  50. #define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8)
  51. struct mshv_vtl_hvcall_fd {
  52. u8 allow_bitmap[MAX_BITMAP_SIZE];
  53. bool allow_map_initialized;
  54. /*
  55. * Used to protect hvcall setup in IOCTLs
  56. */
  57. struct mutex init_mutex;
  58. struct miscdevice *dev;
  59. };
  60. struct mshv_vtl_poll_file {
  61. struct file *file;
  62. wait_queue_entry_t wait;
  63. wait_queue_head_t *wqh;
  64. poll_table pt;
  65. int cpu;
  66. };
  67. struct mshv_vtl {
  68. struct device *module_dev;
  69. u64 id;
  70. };
  71. struct mshv_vtl_per_cpu {
  72. struct mshv_vtl_run *run;
  73. struct page *reg_page;
  74. };
  75. /* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
  76. union hv_synic_overlay_page_msr {
  77. u64 as_uint64;
  78. struct {
  79. u64 enabled: 1;
  80. u64 reserved: 11;
  81. u64 pfn: 52;
  82. } __packed;
  83. };
  84. static struct mutex mshv_vtl_poll_file_lock;
  85. static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
  86. static union hv_register_vsm_capabilities mshv_vsm_capabilities;
  87. static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
  88. static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
  89. static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
  90. static const union hv_input_vtl input_vtl_zero;
  91. static const union hv_input_vtl input_vtl_normal = {
  92. .use_target_vtl = 1,
  93. };
  94. static const struct file_operations mshv_vtl_fops;
  95. static long
  96. mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev)
  97. {
  98. struct mshv_vtl *vtl;
  99. struct file *file;
  100. int fd;
  101. vtl = kzalloc_obj(*vtl);
  102. if (!vtl)
  103. return -ENOMEM;
  104. fd = get_unused_fd_flags(O_CLOEXEC);
  105. if (fd < 0) {
  106. kfree(vtl);
  107. return fd;
  108. }
  109. file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops,
  110. vtl, O_RDWR);
  111. if (IS_ERR(file)) {
  112. kfree(vtl);
  113. return PTR_ERR(file);
  114. }
  115. vtl->module_dev = module_dev;
  116. fd_install(fd, file);
  117. return fd;
  118. }
  119. static long
  120. mshv_ioctl_check_extension(void __user *user_arg)
  121. {
  122. u32 arg;
  123. if (copy_from_user(&arg, user_arg, sizeof(arg)))
  124. return -EFAULT;
  125. switch (arg) {
  126. case MSHV_CAP_CORE_API_STABLE:
  127. return 0;
  128. case MSHV_CAP_REGISTER_PAGE:
  129. return mshv_has_reg_page;
  130. case MSHV_CAP_VTL_RETURN_ACTION:
  131. return mshv_vsm_capabilities.return_action_available;
  132. case MSHV_CAP_DR6_SHARED:
  133. return mshv_vsm_capabilities.dr6_shared;
  134. }
  135. return -EOPNOTSUPP;
  136. }
  137. static long
  138. mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  139. {
  140. struct miscdevice *misc = filp->private_data;
  141. switch (ioctl) {
  142. case MSHV_CHECK_EXTENSION:
  143. return mshv_ioctl_check_extension((void __user *)arg);
  144. case MSHV_CREATE_VTL:
  145. return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device);
  146. }
  147. return -ENOTTY;
  148. }
  149. static const struct file_operations mshv_dev_fops = {
  150. .owner = THIS_MODULE,
  151. .unlocked_ioctl = mshv_dev_ioctl,
  152. .llseek = noop_llseek,
  153. };
  154. static struct miscdevice mshv_dev = {
  155. .minor = MISC_DYNAMIC_MINOR,
  156. .name = "mshv",
  157. .fops = &mshv_dev_fops,
  158. .mode = 0600,
  159. };
  160. static struct mshv_vtl_run *mshv_vtl_this_run(void)
  161. {
  162. return *this_cpu_ptr(&mshv_vtl_per_cpu.run);
  163. }
  164. static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu)
  165. {
  166. return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu);
  167. }
  168. static struct page *mshv_vtl_cpu_reg_page(int cpu)
  169. {
  170. return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
  171. }
  172. static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
  173. {
  174. struct hv_register_assoc reg_assoc = {};
  175. union hv_synic_overlay_page_msr overlay = {};
  176. struct page *reg_page;
  177. reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
  178. if (!reg_page) {
  179. WARN(1, "failed to allocate register page\n");
  180. return;
  181. }
  182. overlay.enabled = 1;
  183. overlay.pfn = page_to_hvpfn(reg_page);
  184. reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
  185. reg_assoc.value.reg64 = overlay.as_uint64;
  186. if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
  187. 1, input_vtl_zero, &reg_assoc)) {
  188. WARN(1, "failed to setup register page\n");
  189. __free_page(reg_page);
  190. return;
  191. }
  192. per_cpu->reg_page = reg_page;
  193. mshv_has_reg_page = true;
  194. }
  195. static void mshv_vtl_synic_enable_regs(unsigned int cpu)
  196. {
  197. union hv_synic_sint sint;
  198. sint.as_uint64 = 0;
  199. sint.vector = HYPERVISOR_CALLBACK_VECTOR;
  200. sint.masked = false;
  201. sint.auto_eoi = hv_recommend_using_aeoi();
  202. /* Enable intercepts */
  203. if (!mshv_vsm_capabilities.intercept_page_available)
  204. hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
  205. sint.as_uint64);
  206. /* VTL2 Host VSP SINT is (un)masked when the user mode requests that */
  207. }
  208. static int mshv_vtl_get_vsm_regs(void)
  209. {
  210. struct hv_register_assoc registers[2];
  211. int ret, count = 2;
  212. registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
  213. registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
  214. ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
  215. count, input_vtl_zero, registers);
  216. if (ret)
  217. return ret;
  218. mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
  219. mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
  220. return ret;
  221. }
  222. static int mshv_vtl_configure_vsm_partition(struct device *dev)
  223. {
  224. union hv_register_vsm_partition_config config;
  225. struct hv_register_assoc reg_assoc;
  226. config.as_uint64 = 0;
  227. config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
  228. config.enable_vtl_protection = 1;
  229. config.zero_memory_on_reset = 1;
  230. config.intercept_vp_startup = 1;
  231. config.intercept_cpuid_unimplemented = 1;
  232. if (mshv_vsm_capabilities.intercept_page_available) {
  233. dev_dbg(dev, "using intercept page\n");
  234. config.intercept_page = 1;
  235. }
  236. reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
  237. reg_assoc.value.reg64 = config.as_uint64;
  238. return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
  239. 1, input_vtl_zero, &reg_assoc);
  240. }
  241. static void mshv_vtl_vmbus_isr(void)
  242. {
  243. struct hv_per_cpu_context *per_cpu;
  244. struct hv_message *msg;
  245. u32 message_type;
  246. union hv_synic_event_flags *event_flags;
  247. struct eventfd_ctx *eventfd;
  248. u16 i;
  249. per_cpu = this_cpu_ptr(hv_context.cpu_context);
  250. if (smp_processor_id() == 0) {
  251. msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX;
  252. message_type = READ_ONCE(msg->header.message_type);
  253. if (message_type != HVMSG_NONE)
  254. tasklet_schedule(&msg_dpc);
  255. }
  256. event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page +
  257. VTL2_VMBUS_SINT_INDEX;
  258. for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) {
  259. if (!sync_test_and_clear_bit(i, event_flags->flags))
  260. continue;
  261. rcu_read_lock();
  262. eventfd = READ_ONCE(flag_eventfds[i]);
  263. if (eventfd)
  264. eventfd_signal(eventfd);
  265. rcu_read_unlock();
  266. }
  267. vmbus_isr();
  268. }
  269. static int mshv_vtl_alloc_context(unsigned int cpu)
  270. {
  271. struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu);
  272. per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
  273. if (!per_cpu->run)
  274. return -ENOMEM;
  275. if (mshv_vsm_capabilities.intercept_page_available)
  276. mshv_vtl_configure_reg_page(per_cpu);
  277. mshv_vtl_synic_enable_regs(cpu);
  278. return 0;
  279. }
  280. static int mshv_vtl_cpuhp_online;
  281. static int hv_vtl_setup_synic(void)
  282. {
  283. int ret;
  284. /* Use our isr to first filter out packets destined for userspace */
  285. hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
  286. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
  287. mshv_vtl_alloc_context, NULL);
  288. if (ret < 0) {
  289. hv_setup_vmbus_handler(vmbus_isr);
  290. return ret;
  291. }
  292. mshv_vtl_cpuhp_online = ret;
  293. return 0;
  294. }
  295. static void hv_vtl_remove_synic(void)
  296. {
  297. cpuhp_remove_state(mshv_vtl_cpuhp_online);
  298. hv_setup_vmbus_handler(vmbus_isr);
  299. }
  300. static int vtl_get_vp_register(struct hv_register_assoc *reg)
  301. {
  302. return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
  303. 1, input_vtl_normal, reg);
  304. }
  305. static int vtl_set_vp_register(struct hv_register_assoc *reg)
  306. {
  307. return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
  308. 1, input_vtl_normal, reg);
  309. }
  310. static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
  311. {
  312. struct mshv_vtl_ram_disposition vtl0_mem;
  313. struct dev_pagemap *pgmap;
  314. void *addr;
  315. if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
  316. return -EFAULT;
  317. /* vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design */
  318. if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
  319. dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
  320. vtl0_mem.start_pfn, vtl0_mem.last_pfn);
  321. return -EFAULT;
  322. }
  323. pgmap = kzalloc_obj(*pgmap);
  324. if (!pgmap)
  325. return -ENOMEM;
  326. pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
  327. pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
  328. pgmap->nr_range = 1;
  329. pgmap->type = MEMORY_DEVICE_GENERIC;
  330. /*
  331. * Determine the highest page order that can be used for the given memory range.
  332. * This works best when the range is aligned; i.e. both the start and the length.
  333. */
  334. pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn);
  335. dev_dbg(vtl->module_dev,
  336. "Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
  337. vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
  338. addr = devm_memremap_pages(mem_dev, pgmap);
  339. if (IS_ERR(addr)) {
  340. dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
  341. kfree(pgmap);
  342. return -EFAULT;
  343. }
  344. /* Don't free pgmap, since it has to stick around until the memory
  345. * is unmapped, which will never happen as there is no scenario
  346. * where VTL0 can be released/shutdown without bringing down VTL2.
  347. */
  348. return 0;
  349. }
  350. static void mshv_vtl_cancel(int cpu)
  351. {
  352. int here = get_cpu();
  353. if (here != cpu) {
  354. if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
  355. smp_send_reschedule(cpu);
  356. } else {
  357. WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
  358. }
  359. put_cpu();
  360. }
  361. static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
  362. {
  363. struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
  364. mshv_vtl_cancel(poll_file->cpu);
  365. return 0;
  366. }
  367. static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
  368. {
  369. struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
  370. WARN_ON(poll_file->wqh);
  371. poll_file->wqh = wqh;
  372. add_wait_queue(wqh, &poll_file->wait);
  373. }
  374. static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
  375. {
  376. struct file *file, *old_file;
  377. struct mshv_vtl_poll_file *poll_file;
  378. struct mshv_vtl_set_poll_file input;
  379. if (copy_from_user(&input, user_input, sizeof(input)))
  380. return -EFAULT;
  381. if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
  382. return -EINVAL;
  383. /*
  384. * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
  385. * CPU is expected to remain online after above cpu_online() check.
  386. */
  387. file = NULL;
  388. file = fget(input.fd);
  389. if (!file)
  390. return -EBADFD;
  391. poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
  392. if (!poll_file)
  393. return -EINVAL;
  394. mutex_lock(&mshv_vtl_poll_file_lock);
  395. if (poll_file->wqh)
  396. remove_wait_queue(poll_file->wqh, &poll_file->wait);
  397. poll_file->wqh = NULL;
  398. old_file = poll_file->file;
  399. poll_file->file = file;
  400. poll_file->cpu = input.cpu;
  401. if (file) {
  402. init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
  403. init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
  404. vfs_poll(file, &poll_file->pt);
  405. }
  406. mutex_unlock(&mshv_vtl_poll_file_lock);
  407. if (old_file)
  408. fput(old_file);
  409. return 0;
  410. }
  411. /* Static table mapping register names to their corresponding actions */
  412. static const struct {
  413. enum hv_register_name reg_name;
  414. int debug_reg_num; /* -1 if not a debug register */
  415. u32 msr_addr; /* 0 if not an MSR */
  416. } reg_table[] = {
  417. /* Debug registers */
  418. {HV_X64_REGISTER_DR0, 0, 0},
  419. {HV_X64_REGISTER_DR1, 1, 0},
  420. {HV_X64_REGISTER_DR2, 2, 0},
  421. {HV_X64_REGISTER_DR3, 3, 0},
  422. {HV_X64_REGISTER_DR6, 6, 0},
  423. /* MTRR MSRs */
  424. {HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
  425. {HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
  426. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
  427. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
  428. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
  429. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
  430. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
  431. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
  432. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
  433. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
  434. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
  435. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
  436. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
  437. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
  438. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
  439. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
  440. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
  441. {HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
  442. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
  443. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
  444. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
  445. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
  446. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
  447. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
  448. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
  449. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
  450. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
  451. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
  452. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
  453. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
  454. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
  455. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
  456. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
  457. {HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
  458. {HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
  459. {HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
  460. {HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
  461. {HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
  462. {HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
  463. {HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
  464. {HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
  465. {HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
  466. {HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
  467. {HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
  468. {HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
  469. };
  470. static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
  471. {
  472. u64 *reg64;
  473. enum hv_register_name gpr_name;
  474. int i;
  475. gpr_name = regs->name;
  476. reg64 = &regs->value.reg64;
  477. /* Search for the register in the table */
  478. for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
  479. if (reg_table[i].reg_name != gpr_name)
  480. continue;
  481. if (reg_table[i].debug_reg_num != -1) {
  482. /* Handle debug registers */
  483. if (gpr_name == HV_X64_REGISTER_DR6 &&
  484. !mshv_vsm_capabilities.dr6_shared)
  485. goto hypercall;
  486. if (set)
  487. native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
  488. else
  489. *reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
  490. } else {
  491. /* Handle MSRs */
  492. if (set)
  493. wrmsrl(reg_table[i].msr_addr, *reg64);
  494. else
  495. rdmsrl(reg_table[i].msr_addr, *reg64);
  496. }
  497. return 0;
  498. }
  499. hypercall:
  500. return 1;
  501. }
  502. static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
  503. {
  504. struct hv_vp_assist_page *hvp;
  505. hvp = hv_vp_assist_page[smp_processor_id()];
  506. /*
  507. * Process signal event direct set in the run page, if any.
  508. */
  509. if (mshv_vsm_capabilities.return_action_available) {
  510. u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size);
  511. WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0);
  512. /*
  513. * Hypervisor will take care of clearing out the actions
  514. * set in the assist page.
  515. */
  516. memcpy(hvp->vtl_ret_actions,
  517. mshv_vtl_this_run()->vtl_ret_actions,
  518. min_t(u32, offset, sizeof(hvp->vtl_ret_actions)));
  519. }
  520. mshv_vtl_return_call(vtl0);
  521. }
  522. static bool mshv_vtl_process_intercept(void)
  523. {
  524. struct hv_per_cpu_context *mshv_cpu;
  525. void *synic_message_page;
  526. struct hv_message *msg;
  527. u32 message_type;
  528. mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
  529. synic_message_page = mshv_cpu->hyp_synic_message_page;
  530. if (unlikely(!synic_message_page))
  531. return true;
  532. msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX;
  533. message_type = READ_ONCE(msg->header.message_type);
  534. if (message_type == HVMSG_NONE)
  535. return true;
  536. memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg));
  537. vmbus_signal_eom(msg, message_type);
  538. return false;
  539. }
  540. static int mshv_vtl_ioctl_return_to_lower_vtl(void)
  541. {
  542. preempt_disable();
  543. for (;;) {
  544. unsigned long irq_flags;
  545. struct hv_vp_assist_page *hvp;
  546. int ret;
  547. if (__xfer_to_guest_mode_work_pending()) {
  548. preempt_enable();
  549. ret = xfer_to_guest_mode_handle_work();
  550. if (ret)
  551. return ret;
  552. preempt_disable();
  553. }
  554. local_irq_save(irq_flags);
  555. if (READ_ONCE(mshv_vtl_this_run()->cancel)) {
  556. local_irq_restore(irq_flags);
  557. preempt_enable();
  558. return -EINTR;
  559. }
  560. mshv_vtl_return(&mshv_vtl_this_run()->cpu_context);
  561. local_irq_restore(irq_flags);
  562. hvp = hv_vp_assist_page[smp_processor_id()];
  563. this_cpu_inc(num_vtl0_transitions);
  564. switch (hvp->vtl_entry_reason) {
  565. case MSHV_ENTRY_REASON_INTERRUPT:
  566. if (!mshv_vsm_capabilities.intercept_page_available &&
  567. likely(!mshv_vtl_process_intercept()))
  568. goto done;
  569. break;
  570. case MSHV_ENTRY_REASON_INTERCEPT:
  571. WARN_ON(!mshv_vsm_capabilities.intercept_page_available);
  572. memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message,
  573. sizeof(hvp->intercept_message));
  574. goto done;
  575. default:
  576. panic("unknown entry reason: %d", hvp->vtl_entry_reason);
  577. }
  578. }
  579. done:
  580. preempt_enable();
  581. return 0;
  582. }
  583. static long
  584. mshv_vtl_ioctl_get_regs(void __user *user_args)
  585. {
  586. struct mshv_vp_registers args;
  587. struct hv_register_assoc reg;
  588. long ret;
  589. if (copy_from_user(&args, user_args, sizeof(args)))
  590. return -EFAULT;
  591. /* This IOCTL supports processing only one register at a time. */
  592. if (args.count != 1)
  593. return -EINVAL;
  594. if (copy_from_user(&reg, (void __user *)args.regs_ptr,
  595. sizeof(reg)))
  596. return -EFAULT;
  597. ret = mshv_vtl_get_set_reg(&reg, false);
  598. if (!ret)
  599. goto copy_args; /* No need of hypercall */
  600. ret = vtl_get_vp_register(&reg);
  601. if (ret)
  602. return ret;
  603. copy_args:
  604. if (copy_to_user((void __user *)args.regs_ptr, &reg, sizeof(reg)))
  605. ret = -EFAULT;
  606. return ret;
  607. }
  608. static long
  609. mshv_vtl_ioctl_set_regs(void __user *user_args)
  610. {
  611. struct mshv_vp_registers args;
  612. struct hv_register_assoc reg;
  613. long ret;
  614. if (copy_from_user(&args, user_args, sizeof(args)))
  615. return -EFAULT;
  616. /* This IOCTL supports processing only one register at a time. */
  617. if (args.count != 1)
  618. return -EINVAL;
  619. if (copy_from_user(&reg, (void __user *)args.regs_ptr, sizeof(reg)))
  620. return -EFAULT;
  621. ret = mshv_vtl_get_set_reg(&reg, true);
  622. if (!ret)
  623. return ret; /* No need of hypercall */
  624. ret = vtl_set_vp_register(&reg);
  625. return ret;
  626. }
  627. static long
  628. mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
  629. {
  630. long ret;
  631. struct mshv_vtl *vtl = filp->private_data;
  632. switch (ioctl) {
  633. case MSHV_SET_POLL_FILE:
  634. ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg);
  635. break;
  636. case MSHV_GET_VP_REGISTERS:
  637. ret = mshv_vtl_ioctl_get_regs((void __user *)arg);
  638. break;
  639. case MSHV_SET_VP_REGISTERS:
  640. ret = mshv_vtl_ioctl_set_regs((void __user *)arg);
  641. break;
  642. case MSHV_RETURN_TO_LOWER_VTL:
  643. ret = mshv_vtl_ioctl_return_to_lower_vtl();
  644. break;
  645. case MSHV_ADD_VTL0_MEMORY:
  646. ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg);
  647. break;
  648. default:
  649. dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl);
  650. ret = -ENOTTY;
  651. }
  652. return ret;
  653. }
  654. static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf)
  655. {
  656. struct page *page;
  657. int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK;
  658. int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT;
  659. if (!cpu_online(cpu))
  660. return VM_FAULT_SIGBUS;
  661. /*
  662. * CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
  663. * CPU is expected to remain online after above cpu_online() check.
  664. */
  665. if (real_off == MSHV_RUN_PAGE_OFFSET) {
  666. page = virt_to_page(mshv_vtl_cpu_run(cpu));
  667. } else if (real_off == MSHV_REG_PAGE_OFFSET) {
  668. if (!mshv_has_reg_page)
  669. return VM_FAULT_SIGBUS;
  670. page = mshv_vtl_cpu_reg_page(cpu);
  671. } else {
  672. return VM_FAULT_NOPAGE;
  673. }
  674. get_page(page);
  675. vmf->page = page;
  676. return 0;
  677. }
  678. static const struct vm_operations_struct mshv_vtl_vm_ops = {
  679. .fault = mshv_vtl_fault,
  680. };
  681. static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma)
  682. {
  683. vma->vm_ops = &mshv_vtl_vm_ops;
  684. return 0;
  685. }
  686. static int mshv_vtl_release(struct inode *inode, struct file *filp)
  687. {
  688. struct mshv_vtl *vtl = filp->private_data;
  689. kfree(vtl);
  690. return 0;
  691. }
  692. static const struct file_operations mshv_vtl_fops = {
  693. .owner = THIS_MODULE,
  694. .unlocked_ioctl = mshv_vtl_ioctl,
  695. .release = mshv_vtl_release,
  696. .mmap = mshv_vtl_mmap,
  697. };
  698. static void mshv_vtl_synic_mask_vmbus_sint(void *info)
  699. {
  700. union hv_synic_sint sint;
  701. const u8 *mask = info;
  702. sint.as_uint64 = 0;
  703. sint.vector = HYPERVISOR_CALLBACK_VECTOR;
  704. sint.masked = (*mask != 0);
  705. sint.auto_eoi = hv_recommend_using_aeoi();
  706. hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX,
  707. sint.as_uint64);
  708. if (!sint.masked)
  709. pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
  710. else
  711. pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
  712. }
  713. static void mshv_vtl_read_remote(void *buffer)
  714. {
  715. struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
  716. struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page +
  717. VTL2_VMBUS_SINT_INDEX;
  718. u32 message_type = READ_ONCE(msg->header.message_type);
  719. WRITE_ONCE(has_message, false);
  720. if (message_type == HVMSG_NONE)
  721. return;
  722. memcpy(buffer, msg, sizeof(*msg));
  723. vmbus_signal_eom(msg, message_type);
  724. }
  725. static bool vtl_synic_mask_vmbus_sint_masked = true;
  726. static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset)
  727. {
  728. struct hv_message msg = {};
  729. int ret;
  730. if (size < sizeof(msg))
  731. return -EINVAL;
  732. for (;;) {
  733. smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true);
  734. if (msg.header.message_type != HVMSG_NONE)
  735. break;
  736. if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
  737. return 0; /* EOF */
  738. if (filp->f_flags & O_NONBLOCK)
  739. return -EAGAIN;
  740. ret = wait_event_interruptible(fd_wait_queue,
  741. READ_ONCE(has_message) ||
  742. READ_ONCE(vtl_synic_mask_vmbus_sint_masked));
  743. if (ret)
  744. return ret;
  745. }
  746. if (copy_to_user(arg, &msg, sizeof(msg)))
  747. return -EFAULT;
  748. return sizeof(msg);
  749. }
  750. static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait)
  751. {
  752. __poll_t mask = 0;
  753. poll_wait(filp, &fd_wait_queue, wait);
  754. if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
  755. mask |= EPOLLIN | EPOLLRDNORM;
  756. return mask;
  757. }
  758. static void mshv_vtl_sint_on_msg_dpc(unsigned long data)
  759. {
  760. WRITE_ONCE(has_message, true);
  761. wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
  762. }
  763. static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg)
  764. {
  765. struct mshv_vtl_sint_post_msg message;
  766. u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT];
  767. if (copy_from_user(&message, arg, sizeof(message)))
  768. return -EFAULT;
  769. if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
  770. return -EINVAL;
  771. if (copy_from_user(payload, (void __user *)message.payload_ptr,
  772. message.payload_size))
  773. return -EFAULT;
  774. return hv_post_message((union hv_connection_id)message.connection_id,
  775. message.message_type, (void *)payload,
  776. message.payload_size);
  777. }
  778. static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
  779. {
  780. u64 input, status;
  781. struct mshv_vtl_signal_event signal_event;
  782. if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
  783. return -EFAULT;
  784. input = signal_event.connection_id | ((u64)signal_event.flag << 32);
  785. status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input);
  786. return hv_result_to_errno(status);
  787. }
  788. static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
  789. {
  790. struct mshv_vtl_set_eventfd set_eventfd;
  791. struct eventfd_ctx *eventfd, *old_eventfd;
  792. if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
  793. return -EFAULT;
  794. if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
  795. return -EINVAL;
  796. eventfd = NULL;
  797. if (set_eventfd.fd >= 0) {
  798. eventfd = eventfd_ctx_fdget(set_eventfd.fd);
  799. if (IS_ERR(eventfd))
  800. return PTR_ERR(eventfd);
  801. }
  802. guard(mutex)(&flag_lock);
  803. old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
  804. WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
  805. if (old_eventfd) {
  806. synchronize_rcu();
  807. eventfd_ctx_put(old_eventfd);
  808. }
  809. return 0;
  810. }
  811. static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg)
  812. {
  813. static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
  814. struct mshv_sint_mask mask;
  815. if (copy_from_user(&mask, arg, sizeof(mask)))
  816. return -EFAULT;
  817. guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
  818. on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
  819. WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
  820. if (mask.mask)
  821. wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
  822. return 0;
  823. }
  824. static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
  825. {
  826. switch (cmd) {
  827. case MSHV_SINT_POST_MESSAGE:
  828. return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg);
  829. case MSHV_SINT_SIGNAL_EVENT:
  830. return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg);
  831. case MSHV_SINT_SET_EVENTFD:
  832. return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg);
  833. case MSHV_SINT_PAUSE_MESSAGE_STREAM:
  834. return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg);
  835. default:
  836. return -ENOIOCTLCMD;
  837. }
  838. }
  839. static const struct file_operations mshv_vtl_sint_ops = {
  840. .owner = THIS_MODULE,
  841. .read = mshv_vtl_sint_read,
  842. .poll = mshv_vtl_sint_poll,
  843. .unlocked_ioctl = mshv_vtl_sint_ioctl,
  844. };
  845. static struct miscdevice mshv_vtl_sint_dev = {
  846. .name = "mshv_sint",
  847. .fops = &mshv_vtl_sint_ops,
  848. .mode = 0600,
  849. .minor = MISC_DYNAMIC_MINOR,
  850. };
  851. static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f)
  852. {
  853. struct miscdevice *dev = f->private_data;
  854. struct mshv_vtl_hvcall_fd *fd;
  855. if (!capable(CAP_SYS_ADMIN))
  856. return -EPERM;
  857. fd = vzalloc(sizeof(*fd));
  858. if (!fd)
  859. return -ENOMEM;
  860. fd->dev = dev;
  861. f->private_data = fd;
  862. mutex_init(&fd->init_mutex);
  863. return 0;
  864. }
  865. static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f)
  866. {
  867. struct mshv_vtl_hvcall_fd *fd;
  868. fd = f->private_data;
  869. if (fd) {
  870. vfree(fd);
  871. f->private_data = NULL;
  872. }
  873. return 0;
  874. }
  875. static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd,
  876. struct mshv_vtl_hvcall_setup __user *hvcall_setup_user)
  877. {
  878. struct mshv_vtl_hvcall_setup hvcall_setup;
  879. guard(mutex)(&fd->init_mutex);
  880. if (fd->allow_map_initialized) {
  881. dev_err(fd->dev->this_device,
  882. "Hypercall allow map has already been set, pid %d\n",
  883. current->pid);
  884. return -EINVAL;
  885. }
  886. if (copy_from_user(&hvcall_setup, hvcall_setup_user,
  887. sizeof(struct mshv_vtl_hvcall_setup))) {
  888. return -EFAULT;
  889. }
  890. if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap))
  891. return -EINVAL;
  892. if (copy_from_user(&fd->allow_bitmap,
  893. (void __user *)hvcall_setup.allow_bitmap_ptr,
  894. hvcall_setup.bitmap_array_size)) {
  895. return -EFAULT;
  896. }
  897. dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n",
  898. current->pid);
  899. fd->allow_map_initialized = true;
  900. return 0;
  901. }
  902. static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code)
  903. {
  904. return test_bit(call_code, (unsigned long *)fd->allow_bitmap);
  905. }
  906. static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd,
  907. struct mshv_vtl_hvcall __user *hvcall_user)
  908. {
  909. struct mshv_vtl_hvcall hvcall;
  910. void *in, *out;
  911. int ret;
  912. if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall)))
  913. return -EFAULT;
  914. if (hvcall.input_size > HV_HYP_PAGE_SIZE)
  915. return -EINVAL;
  916. if (hvcall.output_size > HV_HYP_PAGE_SIZE)
  917. return -EINVAL;
  918. /*
  919. * By default, all hypercalls are not allowed.
  920. * The user mode code has to set up the allow bitmap once.
  921. */
  922. if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) {
  923. dev_err(fd->dev->this_device,
  924. "Hypercall with control data %#llx isn't allowed\n",
  925. hvcall.control);
  926. return -EPERM;
  927. }
  928. /*
  929. * This may create a problem for Confidential VM (CVM) usecase where we need to use
  930. * Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and
  931. * hyperv_pcpu_output_arg) for making a hypervisor call.
  932. *
  933. * TODO: Take care of this when CVM support is added.
  934. */
  935. in = (void *)__get_free_page(GFP_KERNEL);
  936. out = (void *)__get_free_page(GFP_KERNEL);
  937. if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) {
  938. ret = -EFAULT;
  939. goto free_pages;
  940. }
  941. hvcall.status = hv_do_hypercall(hvcall.control, in, out);
  942. if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) {
  943. ret = -EFAULT;
  944. goto free_pages;
  945. }
  946. ret = put_user(hvcall.status, &hvcall_user->status);
  947. free_pages:
  948. free_page((unsigned long)in);
  949. free_page((unsigned long)out);
  950. return ret;
  951. }
  952. static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
  953. {
  954. struct mshv_vtl_hvcall_fd *fd = f->private_data;
  955. switch (cmd) {
  956. case MSHV_HVCALL_SETUP:
  957. return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg);
  958. case MSHV_HVCALL:
  959. return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg);
  960. default:
  961. break;
  962. }
  963. return -ENOIOCTLCMD;
  964. }
  965. static const struct file_operations mshv_vtl_hvcall_dev_file_ops = {
  966. .owner = THIS_MODULE,
  967. .open = mshv_vtl_hvcall_dev_open,
  968. .release = mshv_vtl_hvcall_dev_release,
  969. .unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl,
  970. };
  971. static struct miscdevice mshv_vtl_hvcall_dev = {
  972. .name = "mshv_hvcall",
  973. .nodename = "mshv_hvcall",
  974. .fops = &mshv_vtl_hvcall_dev_file_ops,
  975. .mode = 0600,
  976. .minor = MISC_DYNAMIC_MINOR,
  977. };
  978. static int mshv_vtl_low_open(struct inode *inodep, struct file *filp)
  979. {
  980. pid_t pid = task_pid_vnr(current);
  981. uid_t uid = current_uid().val;
  982. int ret = 0;
  983. pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid);
  984. if (capable(CAP_SYS_ADMIN)) {
  985. filp->private_data = inodep;
  986. } else {
  987. pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d",
  988. __func__, pid, uid);
  989. ret = -EPERM;
  990. }
  991. return ret;
  992. }
  993. static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn)
  994. {
  995. unsigned long mask = size - 1;
  996. unsigned long start = vmf->address & ~mask;
  997. unsigned long end = start + size;
  998. bool is_valid;
  999. is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) &&
  1000. start >= vmf->vma->vm_start &&
  1001. end <= vmf->vma->vm_end;
  1002. if (is_valid)
  1003. *pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT);
  1004. return is_valid;
  1005. }
  1006. static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order)
  1007. {
  1008. unsigned long pfn = vmf->pgoff;
  1009. vm_fault_t ret = VM_FAULT_FALLBACK;
  1010. switch (order) {
  1011. case 0:
  1012. return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
  1013. case PMD_ORDER:
  1014. if (can_fault(vmf, PMD_SIZE, &pfn))
  1015. ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
  1016. return ret;
  1017. case PUD_ORDER:
  1018. if (can_fault(vmf, PUD_SIZE, &pfn))
  1019. ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
  1020. return ret;
  1021. default:
  1022. return VM_FAULT_SIGBUS;
  1023. }
  1024. }
  1025. static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf)
  1026. {
  1027. return mshv_vtl_low_huge_fault(vmf, 0);
  1028. }
  1029. static const struct vm_operations_struct mshv_vtl_low_vm_ops = {
  1030. .fault = mshv_vtl_low_fault,
  1031. .huge_fault = mshv_vtl_low_huge_fault,
  1032. };
  1033. static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma)
  1034. {
  1035. vma->vm_ops = &mshv_vtl_low_vm_ops;
  1036. vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP);
  1037. return 0;
  1038. }
  1039. static const struct file_operations mshv_vtl_low_file_ops = {
  1040. .owner = THIS_MODULE,
  1041. .open = mshv_vtl_low_open,
  1042. .mmap = mshv_vtl_low_mmap,
  1043. };
  1044. static struct miscdevice mshv_vtl_low = {
  1045. .name = "mshv_vtl_low",
  1046. .nodename = "mshv_vtl_low",
  1047. .fops = &mshv_vtl_low_file_ops,
  1048. .mode = 0600,
  1049. .minor = MISC_DYNAMIC_MINOR,
  1050. };
  1051. static int __init mshv_vtl_init(void)
  1052. {
  1053. int ret;
  1054. struct device *dev = mshv_dev.this_device;
  1055. /*
  1056. * This creates /dev/mshv which provides functionality to create VTLs and partitions.
  1057. */
  1058. ret = misc_register(&mshv_dev);
  1059. if (ret) {
  1060. dev_err(dev, "mshv device register failed: %d\n", ret);
  1061. goto free_dev;
  1062. }
  1063. tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0);
  1064. init_waitqueue_head(&fd_wait_queue);
  1065. if (mshv_vtl_get_vsm_regs()) {
  1066. dev_emerg(dev, "Unable to get VSM capabilities !!\n");
  1067. ret = -ENODEV;
  1068. goto free_dev;
  1069. }
  1070. if (mshv_vtl_configure_vsm_partition(dev)) {
  1071. dev_emerg(dev, "VSM configuration failed !!\n");
  1072. ret = -ENODEV;
  1073. goto free_dev;
  1074. }
  1075. mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
  1076. ret = hv_vtl_setup_synic();
  1077. if (ret)
  1078. goto free_dev;
  1079. /*
  1080. * mshv_sint device adds VMBus relay ioctl support.
  1081. * This provides a channel for VTL0 to communicate with VTL2.
  1082. */
  1083. ret = misc_register(&mshv_vtl_sint_dev);
  1084. if (ret)
  1085. goto free_synic;
  1086. /*
  1087. * mshv_hvcall device adds interface to enable userspace for direct hypercalls support.
  1088. */
  1089. ret = misc_register(&mshv_vtl_hvcall_dev);
  1090. if (ret)
  1091. goto free_sint;
  1092. /*
  1093. * mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2.
  1094. * It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0.
  1095. */
  1096. ret = misc_register(&mshv_vtl_low);
  1097. if (ret)
  1098. goto free_hvcall;
  1099. /*
  1100. * "mshv vtl mem dev" device is later used to setup VTL0 memory.
  1101. */
  1102. mem_dev = kzalloc_obj(*mem_dev);
  1103. if (!mem_dev) {
  1104. ret = -ENOMEM;
  1105. goto free_low;
  1106. }
  1107. mutex_init(&mshv_vtl_poll_file_lock);
  1108. device_initialize(mem_dev);
  1109. dev_set_name(mem_dev, "mshv vtl mem dev");
  1110. ret = device_add(mem_dev);
  1111. if (ret) {
  1112. dev_err(dev, "mshv vtl mem dev add: %d\n", ret);
  1113. goto free_mem;
  1114. }
  1115. return 0;
  1116. free_mem:
  1117. kfree(mem_dev);
  1118. free_low:
  1119. misc_deregister(&mshv_vtl_low);
  1120. free_hvcall:
  1121. misc_deregister(&mshv_vtl_hvcall_dev);
  1122. free_sint:
  1123. misc_deregister(&mshv_vtl_sint_dev);
  1124. free_synic:
  1125. hv_vtl_remove_synic();
  1126. free_dev:
  1127. misc_deregister(&mshv_dev);
  1128. return ret;
  1129. }
  1130. static void __exit mshv_vtl_exit(void)
  1131. {
  1132. device_del(mem_dev);
  1133. kfree(mem_dev);
  1134. misc_deregister(&mshv_vtl_low);
  1135. misc_deregister(&mshv_vtl_hvcall_dev);
  1136. misc_deregister(&mshv_vtl_sint_dev);
  1137. hv_vtl_remove_synic();
  1138. misc_deregister(&mshv_dev);
  1139. }
  1140. module_init(mshv_vtl_init);
  1141. module_exit(mshv_vtl_exit);