vfio_user.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2025 Ant Group
  4. * Author: Tiwei Bie <tiwei.btw@antgroup.com>
  5. */
  6. #include <errno.h>
  7. #include <fcntl.h>
  8. #include <unistd.h>
  9. #include <stdio.h>
  10. #include <stdint.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include <sys/ioctl.h>
  14. #include <sys/eventfd.h>
  15. #include <linux/limits.h>
  16. #include <linux/vfio.h>
  17. #include <linux/pci_regs.h>
  18. #include <as-layout.h>
  19. #include <um_malloc.h>
  20. #include "vfio_user.h"
  21. int uml_vfio_user_open_container(void)
  22. {
  23. int r, fd;
  24. fd = open("/dev/vfio/vfio", O_RDWR);
  25. if (fd < 0)
  26. return -errno;
  27. r = ioctl(fd, VFIO_GET_API_VERSION);
  28. if (r != VFIO_API_VERSION) {
  29. r = r < 0 ? -errno : -EINVAL;
  30. goto error;
  31. }
  32. r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
  33. if (r <= 0) {
  34. r = r < 0 ? -errno : -EINVAL;
  35. goto error;
  36. }
  37. return fd;
  38. error:
  39. close(fd);
  40. return r;
  41. }
  42. int uml_vfio_user_setup_iommu(int container)
  43. {
  44. /*
  45. * This is a bit tricky. See the big comment in
  46. * vhost_user_set_mem_table() in virtio_uml.c.
  47. */
  48. unsigned long reserved = uml_reserved - uml_physmem;
  49. struct vfio_iommu_type1_dma_map dma_map = {
  50. .argsz = sizeof(dma_map),
  51. .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
  52. .vaddr = uml_reserved,
  53. .iova = reserved,
  54. .size = physmem_size - reserved,
  55. };
  56. if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
  57. return -errno;
  58. if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0)
  59. return -errno;
  60. return 0;
  61. }
  62. int uml_vfio_user_get_group_id(const char *device)
  63. {
  64. char *path, *buf, *end;
  65. const char *name;
  66. int r;
  67. path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
  68. if (!path)
  69. return -ENOMEM;
  70. sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device);
  71. buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL);
  72. if (!buf) {
  73. r = -ENOMEM;
  74. goto free_path;
  75. }
  76. r = readlink(path, buf, PATH_MAX);
  77. if (r < 0) {
  78. r = -errno;
  79. goto free_buf;
  80. }
  81. buf[r] = '\0';
  82. name = basename(buf);
  83. r = strtoul(name, &end, 10);
  84. if (*end != '\0' || end == name) {
  85. r = -EINVAL;
  86. goto free_buf;
  87. }
  88. free_buf:
  89. kfree(buf);
  90. free_path:
  91. kfree(path);
  92. return r;
  93. }
  94. int uml_vfio_user_open_group(int group_id)
  95. {
  96. char *path;
  97. int fd;
  98. path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
  99. if (!path)
  100. return -ENOMEM;
  101. sprintf(path, "/dev/vfio/%d", group_id);
  102. fd = open(path, O_RDWR);
  103. if (fd < 0) {
  104. fd = -errno;
  105. goto out;
  106. }
  107. out:
  108. kfree(path);
  109. return fd;
  110. }
  111. int uml_vfio_user_set_container(int container, int group)
  112. {
  113. if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0)
  114. return -errno;
  115. return 0;
  116. }
  117. int uml_vfio_user_unset_container(int container, int group)
  118. {
  119. if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0)
  120. return -errno;
  121. return 0;
  122. }
  123. static int vfio_set_irqs(int device, int start, int count, int *irqfd)
  124. {
  125. struct vfio_irq_set *irq_set;
  126. int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count;
  127. int err = 0;
  128. irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL);
  129. if (!irq_set)
  130. return -ENOMEM;
  131. irq_set->argsz = argsz;
  132. irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
  133. irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
  134. irq_set->start = start;
  135. irq_set->count = count;
  136. memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count);
  137. if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) {
  138. err = -errno;
  139. goto out;
  140. }
  141. out:
  142. kfree(irq_set);
  143. return err;
  144. }
  145. int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
  146. int group, const char *device)
  147. {
  148. struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
  149. struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
  150. int err, i;
  151. dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device);
  152. if (dev->device < 0)
  153. return -errno;
  154. if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) {
  155. err = -errno;
  156. goto close_device;
  157. }
  158. dev->num_regions = device_info.num_regions;
  159. if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1)
  160. dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1;
  161. dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions,
  162. UM_GFP_KERNEL);
  163. if (!dev->region) {
  164. err = -ENOMEM;
  165. goto close_device;
  166. }
  167. for (i = 0; i < dev->num_regions; i++) {
  168. struct vfio_region_info region = {
  169. .argsz = sizeof(region),
  170. .index = i,
  171. };
  172. if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, &region) < 0) {
  173. err = -errno;
  174. goto free_region;
  175. }
  176. dev->region[i].size = region.size;
  177. dev->region[i].offset = region.offset;
  178. }
  179. /* Only MSI-X is supported currently. */
  180. irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX;
  181. if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) {
  182. err = -errno;
  183. goto free_region;
  184. }
  185. dev->irq_count = irq_info.count;
  186. dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL);
  187. if (!dev->irqfd) {
  188. err = -ENOMEM;
  189. goto free_region;
  190. }
  191. memset(dev->irqfd, -1, sizeof(int) * dev->irq_count);
  192. err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
  193. if (err)
  194. goto free_irqfd;
  195. return 0;
  196. free_irqfd:
  197. kfree(dev->irqfd);
  198. free_region:
  199. kfree(dev->region);
  200. close_device:
  201. close(dev->device);
  202. return err;
  203. }
  204. void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev)
  205. {
  206. kfree(dev->irqfd);
  207. kfree(dev->region);
  208. close(dev->device);
  209. }
  210. int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index)
  211. {
  212. int irqfd;
  213. irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
  214. if (irqfd < 0)
  215. return -errno;
  216. dev->irqfd[index] = irqfd;
  217. return irqfd;
  218. }
  219. void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index)
  220. {
  221. close(dev->irqfd[index]);
  222. dev->irqfd[index] = -1;
  223. }
  224. int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev)
  225. {
  226. return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
  227. }
  228. static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index,
  229. uint64_t offset, void *buf, uint64_t size)
  230. {
  231. if (index >= dev->num_regions || offset + size > dev->region[index].size)
  232. return -EINVAL;
  233. if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0)
  234. return -errno;
  235. return 0;
  236. }
  237. static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index,
  238. uint64_t offset, const void *buf, uint64_t size)
  239. {
  240. if (index >= dev->num_regions || offset + size > dev->region[index].size)
  241. return -EINVAL;
  242. if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0)
  243. return -errno;
  244. return 0;
  245. }
  246. int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
  247. unsigned int offset, void *buf, int size)
  248. {
  249. return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX,
  250. offset, buf, size);
  251. }
  252. int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
  253. unsigned int offset, const void *buf, int size)
  254. {
  255. return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX,
  256. offset, buf, size);
  257. }
  258. int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
  259. unsigned int offset, void *buf, int size)
  260. {
  261. return vfio_region_read(dev, bar, offset, buf, size);
  262. }
  263. int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
  264. unsigned int offset, const void *buf, int size)
  265. {
  266. return vfio_region_write(dev, bar, offset, buf, size);
  267. }