vduse_dev.c 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VDUSE: vDPA Device in Userspace
  4. *
  5. * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
  6. *
  7. * Author: Xie Yongji <xieyongji@bytedance.com>
  8. *
  9. */
  10. #include "linux/virtio_net.h"
  11. #include <linux/cleanup.h>
  12. #include <linux/init.h>
  13. #include <linux/module.h>
  14. #include <linux/cdev.h>
  15. #include <linux/device.h>
  16. #include <linux/eventfd.h>
  17. #include <linux/slab.h>
  18. #include <linux/wait.h>
  19. #include <linux/dma-map-ops.h>
  20. #include <linux/poll.h>
  21. #include <linux/file.h>
  22. #include <linux/uio.h>
  23. #include <linux/vdpa.h>
  24. #include <linux/nospec.h>
  25. #include <linux/virtio.h>
  26. #include <linux/vmalloc.h>
  27. #include <linux/sched/mm.h>
  28. #include <uapi/linux/vduse.h>
  29. #include <uapi/linux/vdpa.h>
  30. #include <uapi/linux/virtio_config.h>
  31. #include <uapi/linux/virtio_ids.h>
  32. #include <uapi/linux/virtio_blk.h>
  33. #include <uapi/linux/virtio_ring.h>
  34. #include <linux/mod_devicetable.h>
  35. #include "iova_domain.h"
  36. #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
  37. #define DRV_DESC "vDPA Device in Userspace"
  38. #define DRV_LICENSE "GPL v2"
  39. #define VDUSE_DEV_MAX (1U << MINORBITS)
  40. #define VDUSE_DEV_MAX_GROUPS 0xffff
  41. #define VDUSE_DEV_MAX_AS 0xffff
  42. #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
  43. #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
  44. #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
  45. /* 128 MB reserved for virtqueue creation */
  46. #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
  47. #define VDUSE_MSG_DEFAULT_TIMEOUT 30
  48. #define IRQ_UNBOUND -1
  49. /*
  50. * VDUSE instance have not asked the vduse API version, so assume 0.
  51. *
  52. * Old devices may not ask for the device version and assume it is 0. Keep
  53. * this value for these. From the moment the VDUSE instance ask for the
  54. * version, convert to the latests supported one and continue regular flow
  55. */
  56. #define VDUSE_API_VERSION_NOT_ASKED U64_MAX
  57. struct vduse_virtqueue {
  58. u16 index;
  59. u16 num_max;
  60. u32 num;
  61. u64 desc_addr;
  62. u64 driver_addr;
  63. u64 device_addr;
  64. struct vdpa_vq_state state;
  65. bool ready;
  66. bool kicked;
  67. u32 group;
  68. spinlock_t kick_lock;
  69. spinlock_t irq_lock;
  70. struct eventfd_ctx *kickfd;
  71. struct vdpa_callback cb;
  72. struct work_struct inject;
  73. struct work_struct kick;
  74. int irq_effective_cpu;
  75. struct cpumask irq_affinity;
  76. struct kobject kobj;
  77. };
  78. struct vduse_dev;
  79. struct vduse_vdpa {
  80. struct vdpa_device vdpa;
  81. struct vduse_dev *dev;
  82. };
  83. struct vduse_umem {
  84. unsigned long iova;
  85. unsigned long npages;
  86. struct page **pages;
  87. struct mm_struct *mm;
  88. };
  89. struct vduse_as {
  90. struct vduse_iova_domain *domain;
  91. struct vduse_umem *umem;
  92. struct mutex mem_lock;
  93. };
  94. struct vduse_vq_group {
  95. rwlock_t as_lock;
  96. struct vduse_as *as; /* Protected by as_lock */
  97. struct vduse_dev *dev;
  98. };
  99. struct vduse_dev {
  100. struct vduse_vdpa *vdev;
  101. struct device *dev;
  102. struct vduse_virtqueue **vqs;
  103. struct vduse_as *as;
  104. char *name;
  105. struct mutex lock;
  106. spinlock_t msg_lock;
  107. u64 msg_unique;
  108. u32 msg_timeout;
  109. wait_queue_head_t waitq;
  110. struct list_head send_list;
  111. struct list_head recv_list;
  112. struct vdpa_callback config_cb;
  113. struct work_struct inject;
  114. spinlock_t irq_lock;
  115. struct rw_semaphore rwsem;
  116. int minor;
  117. bool broken;
  118. bool connected;
  119. u64 api_version;
  120. u64 device_features;
  121. u64 driver_features;
  122. u32 device_id;
  123. u32 vendor_id;
  124. u32 generation;
  125. u32 config_size;
  126. void *config;
  127. u8 status;
  128. u32 vq_num;
  129. u32 vq_align;
  130. u32 ngroups;
  131. u32 nas;
  132. struct vduse_vq_group *groups;
  133. unsigned int bounce_size;
  134. struct mutex domain_lock;
  135. };
  136. struct vduse_dev_msg {
  137. struct vduse_dev_request req;
  138. struct vduse_dev_response resp;
  139. struct list_head list;
  140. wait_queue_head_t waitq;
  141. bool completed;
  142. };
  143. struct vduse_control {
  144. u64 api_version;
  145. };
  146. static DEFINE_MUTEX(vduse_lock);
  147. static DEFINE_IDR(vduse_idr);
  148. static dev_t vduse_major;
  149. static struct cdev vduse_ctrl_cdev;
  150. static struct cdev vduse_cdev;
  151. static struct workqueue_struct *vduse_irq_wq;
  152. static struct workqueue_struct *vduse_irq_bound_wq;
  153. static u32 allowed_device_id[] = {
  154. VIRTIO_ID_BLOCK,
  155. VIRTIO_ID_NET,
  156. VIRTIO_ID_FS,
  157. };
  158. static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
  159. {
  160. struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
  161. return vdev->dev;
  162. }
  163. static inline struct vduse_dev *dev_to_vduse(struct device *dev)
  164. {
  165. struct vdpa_device *vdpa = dev_to_vdpa(dev);
  166. return vdpa_to_vduse(vdpa);
  167. }
  168. static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
  169. uint32_t request_id)
  170. {
  171. struct vduse_dev_msg *msg;
  172. list_for_each_entry(msg, head, list) {
  173. if (msg->req.request_id == request_id) {
  174. list_del(&msg->list);
  175. return msg;
  176. }
  177. }
  178. return NULL;
  179. }
  180. static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
  181. {
  182. struct vduse_dev_msg *msg = NULL;
  183. if (!list_empty(head)) {
  184. msg = list_first_entry(head, struct vduse_dev_msg, list);
  185. list_del(&msg->list);
  186. }
  187. return msg;
  188. }
  189. static void vduse_enqueue_msg(struct list_head *head,
  190. struct vduse_dev_msg *msg)
  191. {
  192. list_add_tail(&msg->list, head);
  193. }
  194. static void vduse_dev_broken(struct vduse_dev *dev)
  195. {
  196. struct vduse_dev_msg *msg, *tmp;
  197. if (unlikely(dev->broken))
  198. return;
  199. list_splice_init(&dev->recv_list, &dev->send_list);
  200. list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
  201. list_del(&msg->list);
  202. msg->completed = 1;
  203. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  204. wake_up(&msg->waitq);
  205. }
  206. dev->broken = true;
  207. wake_up(&dev->waitq);
  208. }
  209. static int vduse_dev_msg_sync(struct vduse_dev *dev,
  210. struct vduse_dev_msg *msg)
  211. {
  212. int ret;
  213. if (unlikely(dev->broken))
  214. return -EIO;
  215. init_waitqueue_head(&msg->waitq);
  216. spin_lock(&dev->msg_lock);
  217. if (unlikely(dev->broken)) {
  218. spin_unlock(&dev->msg_lock);
  219. return -EIO;
  220. }
  221. msg->req.request_id = dev->msg_unique++;
  222. vduse_enqueue_msg(&dev->send_list, msg);
  223. wake_up(&dev->waitq);
  224. spin_unlock(&dev->msg_lock);
  225. if (dev->msg_timeout)
  226. ret = wait_event_killable_timeout(msg->waitq, msg->completed,
  227. (long)dev->msg_timeout * HZ);
  228. else
  229. ret = wait_event_killable(msg->waitq, msg->completed);
  230. spin_lock(&dev->msg_lock);
  231. if (!msg->completed) {
  232. list_del(&msg->list);
  233. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  234. /* Mark the device as malfunction when there is a timeout */
  235. if (!ret)
  236. vduse_dev_broken(dev);
  237. }
  238. ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
  239. spin_unlock(&dev->msg_lock);
  240. return ret;
  241. }
  242. static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
  243. struct vduse_virtqueue *vq,
  244. struct vdpa_vq_state_packed *packed)
  245. {
  246. struct vduse_dev_msg msg = { 0 };
  247. int ret;
  248. msg.req.type = VDUSE_GET_VQ_STATE;
  249. msg.req.vq_state.index = vq->index;
  250. ret = vduse_dev_msg_sync(dev, &msg);
  251. if (ret)
  252. return ret;
  253. packed->last_avail_counter =
  254. msg.resp.vq_state.packed.last_avail_counter & 0x0001;
  255. packed->last_avail_idx =
  256. msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
  257. packed->last_used_counter =
  258. msg.resp.vq_state.packed.last_used_counter & 0x0001;
  259. packed->last_used_idx =
  260. msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
  261. return 0;
  262. }
  263. static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
  264. struct vduse_virtqueue *vq,
  265. struct vdpa_vq_state_split *split)
  266. {
  267. struct vduse_dev_msg msg = { 0 };
  268. int ret;
  269. msg.req.type = VDUSE_GET_VQ_STATE;
  270. msg.req.vq_state.index = vq->index;
  271. ret = vduse_dev_msg_sync(dev, &msg);
  272. if (ret)
  273. return ret;
  274. split->avail_index = msg.resp.vq_state.split.avail_index;
  275. return 0;
  276. }
  277. static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
  278. {
  279. struct vduse_dev_msg msg = { 0 };
  280. msg.req.type = VDUSE_SET_STATUS;
  281. msg.req.s.status = status;
  282. return vduse_dev_msg_sync(dev, &msg);
  283. }
  284. static int vduse_dev_update_iotlb(struct vduse_dev *dev, u32 asid,
  285. u64 start, u64 last)
  286. {
  287. struct vduse_dev_msg msg = { 0 };
  288. if (last < start)
  289. return -EINVAL;
  290. msg.req.type = VDUSE_UPDATE_IOTLB;
  291. if (dev->api_version < VDUSE_API_VERSION_1) {
  292. msg.req.iova.start = start;
  293. msg.req.iova.last = last;
  294. } else {
  295. msg.req.iova_v2.start = start;
  296. msg.req.iova_v2.last = last;
  297. msg.req.iova_v2.asid = asid;
  298. }
  299. return vduse_dev_msg_sync(dev, &msg);
  300. }
  301. static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
  302. {
  303. struct file *file = iocb->ki_filp;
  304. struct vduse_dev *dev = file->private_data;
  305. struct vduse_dev_msg *msg;
  306. int size = sizeof(struct vduse_dev_request);
  307. ssize_t ret;
  308. if (iov_iter_count(to) < size)
  309. return -EINVAL;
  310. spin_lock(&dev->msg_lock);
  311. while (1) {
  312. msg = vduse_dequeue_msg(&dev->send_list);
  313. if (msg)
  314. break;
  315. ret = -EAGAIN;
  316. if (file->f_flags & O_NONBLOCK)
  317. goto unlock;
  318. spin_unlock(&dev->msg_lock);
  319. ret = wait_event_interruptible_exclusive(dev->waitq,
  320. !list_empty(&dev->send_list));
  321. if (ret)
  322. return ret;
  323. spin_lock(&dev->msg_lock);
  324. }
  325. spin_unlock(&dev->msg_lock);
  326. ret = copy_to_iter(&msg->req, size, to);
  327. spin_lock(&dev->msg_lock);
  328. if (ret != size) {
  329. ret = -EFAULT;
  330. vduse_enqueue_msg(&dev->send_list, msg);
  331. goto unlock;
  332. }
  333. vduse_enqueue_msg(&dev->recv_list, msg);
  334. unlock:
  335. spin_unlock(&dev->msg_lock);
  336. return ret;
  337. }
  338. static bool is_mem_zero(const char *ptr, int size)
  339. {
  340. int i;
  341. for (i = 0; i < size; i++) {
  342. if (ptr[i])
  343. return false;
  344. }
  345. return true;
  346. }
  347. static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
  348. {
  349. struct file *file = iocb->ki_filp;
  350. struct vduse_dev *dev = file->private_data;
  351. struct vduse_dev_response resp;
  352. struct vduse_dev_msg *msg;
  353. size_t ret;
  354. ret = copy_from_iter(&resp, sizeof(resp), from);
  355. if (ret != sizeof(resp))
  356. return -EINVAL;
  357. if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
  358. return -EINVAL;
  359. spin_lock(&dev->msg_lock);
  360. msg = vduse_find_msg(&dev->recv_list, resp.request_id);
  361. if (!msg) {
  362. ret = -ENOENT;
  363. goto unlock;
  364. }
  365. memcpy(&msg->resp, &resp, sizeof(resp));
  366. msg->completed = 1;
  367. wake_up(&msg->waitq);
  368. unlock:
  369. spin_unlock(&dev->msg_lock);
  370. return ret;
  371. }
  372. static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
  373. {
  374. struct vduse_dev *dev = file->private_data;
  375. __poll_t mask = 0;
  376. poll_wait(file, &dev->waitq, wait);
  377. spin_lock(&dev->msg_lock);
  378. if (unlikely(dev->broken))
  379. mask |= EPOLLERR;
  380. if (!list_empty(&dev->send_list))
  381. mask |= EPOLLIN | EPOLLRDNORM;
  382. if (!list_empty(&dev->recv_list))
  383. mask |= EPOLLOUT | EPOLLWRNORM;
  384. spin_unlock(&dev->msg_lock);
  385. return mask;
  386. }
  387. static void vduse_dev_reset(struct vduse_dev *dev)
  388. {
  389. int i;
  390. /* The coherent mappings are handled in vduse_dev_free_coherent() */
  391. for (i = 0; i < dev->nas; i++) {
  392. struct vduse_iova_domain *domain = dev->as[i].domain;
  393. if (domain && domain->bounce_map)
  394. vduse_domain_reset_bounce_map(domain);
  395. }
  396. down_write(&dev->rwsem);
  397. dev->status = 0;
  398. dev->driver_features = 0;
  399. dev->generation++;
  400. spin_lock(&dev->irq_lock);
  401. dev->config_cb.callback = NULL;
  402. dev->config_cb.private = NULL;
  403. spin_unlock(&dev->irq_lock);
  404. flush_work(&dev->inject);
  405. for (i = 0; i < dev->vq_num; i++) {
  406. struct vduse_virtqueue *vq = dev->vqs[i];
  407. vq->ready = false;
  408. vq->desc_addr = 0;
  409. vq->driver_addr = 0;
  410. vq->device_addr = 0;
  411. vq->num = 0;
  412. memset(&vq->state, 0, sizeof(vq->state));
  413. spin_lock(&vq->kick_lock);
  414. vq->kicked = false;
  415. if (vq->kickfd)
  416. eventfd_ctx_put(vq->kickfd);
  417. vq->kickfd = NULL;
  418. spin_unlock(&vq->kick_lock);
  419. spin_lock(&vq->irq_lock);
  420. vq->cb.callback = NULL;
  421. vq->cb.private = NULL;
  422. vq->cb.trigger = NULL;
  423. spin_unlock(&vq->irq_lock);
  424. flush_work(&vq->inject);
  425. flush_work(&vq->kick);
  426. }
  427. up_write(&dev->rwsem);
  428. }
  429. static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
  430. u64 desc_area, u64 driver_area,
  431. u64 device_area)
  432. {
  433. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  434. struct vduse_virtqueue *vq = dev->vqs[idx];
  435. vq->desc_addr = desc_area;
  436. vq->driver_addr = driver_area;
  437. vq->device_addr = device_area;
  438. return 0;
  439. }
  440. static void vduse_vq_kick(struct vduse_virtqueue *vq)
  441. {
  442. spin_lock(&vq->kick_lock);
  443. if (!vq->ready)
  444. goto unlock;
  445. if (vq->kickfd)
  446. eventfd_signal(vq->kickfd);
  447. else
  448. vq->kicked = true;
  449. unlock:
  450. spin_unlock(&vq->kick_lock);
  451. }
  452. static void vduse_vq_kick_work(struct work_struct *work)
  453. {
  454. struct vduse_virtqueue *vq = container_of(work,
  455. struct vduse_virtqueue, kick);
  456. vduse_vq_kick(vq);
  457. }
  458. static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
  459. {
  460. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  461. struct vduse_virtqueue *vq = dev->vqs[idx];
  462. if (!eventfd_signal_allowed()) {
  463. schedule_work(&vq->kick);
  464. return;
  465. }
  466. vduse_vq_kick(vq);
  467. }
  468. static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
  469. struct vdpa_callback *cb)
  470. {
  471. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  472. struct vduse_virtqueue *vq = dev->vqs[idx];
  473. spin_lock(&vq->irq_lock);
  474. vq->cb.callback = cb->callback;
  475. vq->cb.private = cb->private;
  476. vq->cb.trigger = cb->trigger;
  477. spin_unlock(&vq->irq_lock);
  478. }
  479. static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
  480. {
  481. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  482. struct vduse_virtqueue *vq = dev->vqs[idx];
  483. vq->num = num;
  484. }
  485. static u16 vduse_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 idx)
  486. {
  487. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  488. struct vduse_virtqueue *vq = dev->vqs[idx];
  489. if (vq->num)
  490. return vq->num;
  491. else
  492. return vq->num_max;
  493. }
  494. static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
  495. u16 idx, bool ready)
  496. {
  497. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  498. struct vduse_virtqueue *vq = dev->vqs[idx];
  499. vq->ready = ready;
  500. }
  501. static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
  502. {
  503. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  504. struct vduse_virtqueue *vq = dev->vqs[idx];
  505. return vq->ready;
  506. }
  507. static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
  508. const struct vdpa_vq_state *state)
  509. {
  510. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  511. struct vduse_virtqueue *vq = dev->vqs[idx];
  512. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  513. vq->state.packed.last_avail_counter =
  514. state->packed.last_avail_counter;
  515. vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
  516. vq->state.packed.last_used_counter =
  517. state->packed.last_used_counter;
  518. vq->state.packed.last_used_idx = state->packed.last_used_idx;
  519. } else
  520. vq->state.split.avail_index = state->split.avail_index;
  521. return 0;
  522. }
  523. static u32 vduse_get_vq_group(struct vdpa_device *vdpa, u16 idx)
  524. {
  525. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  526. if (dev->api_version < VDUSE_API_VERSION_1)
  527. return 0;
  528. return dev->vqs[idx]->group;
  529. }
  530. static union virtio_map vduse_get_vq_map(struct vdpa_device *vdpa, u16 idx)
  531. {
  532. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  533. u32 vq_group = vduse_get_vq_group(vdpa, idx);
  534. union virtio_map ret = {
  535. .group = &dev->groups[vq_group],
  536. };
  537. return ret;
  538. }
  539. DEFINE_GUARD(vq_group_as_read_lock, struct vduse_vq_group *,
  540. if (_T->dev->nas > 1)
  541. read_lock(&_T->as_lock),
  542. if (_T->dev->nas > 1)
  543. read_unlock(&_T->as_lock))
  544. DEFINE_GUARD(vq_group_as_write_lock, struct vduse_vq_group *,
  545. if (_T->dev->nas > 1)
  546. write_lock(&_T->as_lock),
  547. if (_T->dev->nas > 1)
  548. write_unlock(&_T->as_lock))
  549. static int vduse_set_group_asid(struct vdpa_device *vdpa, unsigned int group,
  550. unsigned int asid)
  551. {
  552. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  553. struct vduse_dev_msg msg = { 0 };
  554. int r;
  555. if (dev->api_version < VDUSE_API_VERSION_1)
  556. return -EINVAL;
  557. msg.req.type = VDUSE_SET_VQ_GROUP_ASID;
  558. msg.req.vq_group_asid.group = group;
  559. msg.req.vq_group_asid.asid = asid;
  560. r = vduse_dev_msg_sync(dev, &msg);
  561. if (r < 0)
  562. return r;
  563. guard(vq_group_as_write_lock)(&dev->groups[group]);
  564. dev->groups[group].as = &dev->as[asid];
  565. return 0;
  566. }
  567. static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
  568. struct vdpa_vq_state *state)
  569. {
  570. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  571. struct vduse_virtqueue *vq = dev->vqs[idx];
  572. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
  573. return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
  574. return vduse_dev_get_vq_state_split(dev, vq, &state->split);
  575. }
  576. static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
  577. {
  578. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  579. return dev->vq_align;
  580. }
  581. static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
  582. {
  583. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  584. return dev->device_features;
  585. }
  586. static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
  587. {
  588. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  589. dev->driver_features = features;
  590. return 0;
  591. }
  592. static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
  593. {
  594. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  595. return dev->driver_features;
  596. }
  597. static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
  598. struct vdpa_callback *cb)
  599. {
  600. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  601. spin_lock(&dev->irq_lock);
  602. dev->config_cb.callback = cb->callback;
  603. dev->config_cb.private = cb->private;
  604. spin_unlock(&dev->irq_lock);
  605. }
  606. static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
  607. {
  608. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  609. u16 num_max = 0;
  610. int i;
  611. for (i = 0; i < dev->vq_num; i++)
  612. if (num_max < dev->vqs[i]->num_max)
  613. num_max = dev->vqs[i]->num_max;
  614. return num_max;
  615. }
  616. static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
  617. {
  618. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  619. return dev->device_id;
  620. }
  621. static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
  622. {
  623. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  624. return dev->vendor_id;
  625. }
  626. static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
  627. {
  628. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  629. return dev->status;
  630. }
  631. static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
  632. {
  633. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  634. if (vduse_dev_set_status(dev, status))
  635. return;
  636. dev->status = status;
  637. }
  638. static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
  639. {
  640. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  641. return dev->config_size;
  642. }
  643. static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
  644. void *buf, unsigned int len)
  645. {
  646. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  647. /* Initialize the buffer in case of partial copy. */
  648. memset(buf, 0, len);
  649. if (offset > dev->config_size)
  650. return;
  651. if (len > dev->config_size - offset)
  652. len = dev->config_size - offset;
  653. memcpy(buf, dev->config + offset, len);
  654. }
  655. static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
  656. const void *buf, unsigned int len)
  657. {
  658. /* Now we only support read-only configuration space */
  659. }
  660. static int vduse_vdpa_reset(struct vdpa_device *vdpa)
  661. {
  662. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  663. int ret = vduse_dev_set_status(dev, 0);
  664. vduse_dev_reset(dev);
  665. return ret;
  666. }
  667. static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
  668. {
  669. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  670. return dev->generation;
  671. }
  672. static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
  673. const struct cpumask *cpu_mask)
  674. {
  675. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  676. if (cpu_mask)
  677. cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
  678. else
  679. cpumask_setall(&dev->vqs[idx]->irq_affinity);
  680. return 0;
  681. }
  682. static const struct cpumask *
  683. vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
  684. {
  685. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  686. return &dev->vqs[idx]->irq_affinity;
  687. }
  688. static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
  689. unsigned int asid,
  690. struct vhost_iotlb *iotlb)
  691. {
  692. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  693. int ret;
  694. ret = vduse_domain_set_map(dev->as[asid].domain, iotlb);
  695. if (ret)
  696. return ret;
  697. ret = vduse_dev_update_iotlb(dev, asid, 0ULL, ULLONG_MAX);
  698. if (ret) {
  699. vduse_domain_clear_map(dev->as[asid].domain, iotlb);
  700. return ret;
  701. }
  702. return 0;
  703. }
  704. static void vduse_vdpa_free(struct vdpa_device *vdpa)
  705. {
  706. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  707. dev->vdev = NULL;
  708. }
  709. static const struct vdpa_config_ops vduse_vdpa_config_ops = {
  710. .set_vq_address = vduse_vdpa_set_vq_address,
  711. .kick_vq = vduse_vdpa_kick_vq,
  712. .set_vq_cb = vduse_vdpa_set_vq_cb,
  713. .set_vq_num = vduse_vdpa_set_vq_num,
  714. .get_vq_size = vduse_vdpa_get_vq_size,
  715. .get_vq_group = vduse_get_vq_group,
  716. .set_vq_ready = vduse_vdpa_set_vq_ready,
  717. .get_vq_ready = vduse_vdpa_get_vq_ready,
  718. .set_vq_state = vduse_vdpa_set_vq_state,
  719. .get_vq_state = vduse_vdpa_get_vq_state,
  720. .get_vq_align = vduse_vdpa_get_vq_align,
  721. .get_device_features = vduse_vdpa_get_device_features,
  722. .set_driver_features = vduse_vdpa_set_driver_features,
  723. .get_driver_features = vduse_vdpa_get_driver_features,
  724. .set_config_cb = vduse_vdpa_set_config_cb,
  725. .get_vq_num_max = vduse_vdpa_get_vq_num_max,
  726. .get_device_id = vduse_vdpa_get_device_id,
  727. .get_vendor_id = vduse_vdpa_get_vendor_id,
  728. .get_status = vduse_vdpa_get_status,
  729. .set_status = vduse_vdpa_set_status,
  730. .get_config_size = vduse_vdpa_get_config_size,
  731. .get_config = vduse_vdpa_get_config,
  732. .set_config = vduse_vdpa_set_config,
  733. .get_generation = vduse_vdpa_get_generation,
  734. .set_vq_affinity = vduse_vdpa_set_vq_affinity,
  735. .get_vq_affinity = vduse_vdpa_get_vq_affinity,
  736. .reset = vduse_vdpa_reset,
  737. .set_map = vduse_vdpa_set_map,
  738. .set_group_asid = vduse_set_group_asid,
  739. .get_vq_map = vduse_get_vq_map,
  740. .free = vduse_vdpa_free,
  741. };
  742. static void vduse_dev_sync_single_for_device(union virtio_map token,
  743. dma_addr_t dma_addr, size_t size,
  744. enum dma_data_direction dir)
  745. {
  746. struct vduse_iova_domain *domain;
  747. if (!token.group)
  748. return;
  749. guard(vq_group_as_read_lock)(token.group);
  750. domain = token.group->as->domain;
  751. vduse_domain_sync_single_for_device(domain, dma_addr, size, dir);
  752. }
  753. static void vduse_dev_sync_single_for_cpu(union virtio_map token,
  754. dma_addr_t dma_addr, size_t size,
  755. enum dma_data_direction dir)
  756. {
  757. struct vduse_iova_domain *domain;
  758. if (!token.group)
  759. return;
  760. guard(vq_group_as_read_lock)(token.group);
  761. domain = token.group->as->domain;
  762. vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir);
  763. }
  764. static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page,
  765. unsigned long offset, size_t size,
  766. enum dma_data_direction dir,
  767. unsigned long attrs)
  768. {
  769. struct vduse_iova_domain *domain;
  770. if (!token.group)
  771. return DMA_MAPPING_ERROR;
  772. guard(vq_group_as_read_lock)(token.group);
  773. domain = token.group->as->domain;
  774. return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
  775. }
  776. static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr,
  777. size_t size, enum dma_data_direction dir,
  778. unsigned long attrs)
  779. {
  780. struct vduse_iova_domain *domain;
  781. if (!token.group)
  782. return;
  783. guard(vq_group_as_read_lock)(token.group);
  784. domain = token.group->as->domain;
  785. vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
  786. }
  787. static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size,
  788. dma_addr_t *dma_addr, gfp_t flag)
  789. {
  790. void *addr;
  791. *dma_addr = DMA_MAPPING_ERROR;
  792. if (!token.group)
  793. return NULL;
  794. addr = alloc_pages_exact(size, flag);
  795. if (!addr)
  796. return NULL;
  797. {
  798. struct vduse_iova_domain *domain;
  799. guard(vq_group_as_read_lock)(token.group);
  800. domain = token.group->as->domain;
  801. *dma_addr = vduse_domain_alloc_coherent(domain, size, addr);
  802. if (*dma_addr == DMA_MAPPING_ERROR)
  803. goto err;
  804. }
  805. return addr;
  806. err:
  807. free_pages_exact(addr, size);
  808. return NULL;
  809. }
  810. static void vduse_dev_free_coherent(union virtio_map token, size_t size,
  811. void *vaddr, dma_addr_t dma_addr,
  812. unsigned long attrs)
  813. {
  814. if (!token.group)
  815. return;
  816. {
  817. struct vduse_iova_domain *domain;
  818. guard(vq_group_as_read_lock)(token.group);
  819. domain = token.group->as->domain;
  820. vduse_domain_free_coherent(domain, size, dma_addr, attrs);
  821. }
  822. free_pages_exact(vaddr, size);
  823. }
  824. static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr)
  825. {
  826. if (!token.group)
  827. return false;
  828. guard(vq_group_as_read_lock)(token.group);
  829. return dma_addr < token.group->as->domain->bounce_size;
  830. }
  831. static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr)
  832. {
  833. if (unlikely(dma_addr == DMA_MAPPING_ERROR))
  834. return -ENOMEM;
  835. return 0;
  836. }
  837. static size_t vduse_dev_max_mapping_size(union virtio_map token)
  838. {
  839. if (!token.group)
  840. return 0;
  841. guard(vq_group_as_read_lock)(token.group);
  842. return token.group->as->domain->bounce_size;
  843. }
  844. static const struct virtio_map_ops vduse_map_ops = {
  845. .sync_single_for_device = vduse_dev_sync_single_for_device,
  846. .sync_single_for_cpu = vduse_dev_sync_single_for_cpu,
  847. .map_page = vduse_dev_map_page,
  848. .unmap_page = vduse_dev_unmap_page,
  849. .alloc = vduse_dev_alloc_coherent,
  850. .free = vduse_dev_free_coherent,
  851. .need_sync = vduse_dev_need_sync,
  852. .mapping_error = vduse_dev_mapping_error,
  853. .max_mapping_size = vduse_dev_max_mapping_size,
  854. };
  855. static unsigned int perm_to_file_flags(u8 perm)
  856. {
  857. unsigned int flags = 0;
  858. switch (perm) {
  859. case VDUSE_ACCESS_WO:
  860. flags |= O_WRONLY;
  861. break;
  862. case VDUSE_ACCESS_RO:
  863. flags |= O_RDONLY;
  864. break;
  865. case VDUSE_ACCESS_RW:
  866. flags |= O_RDWR;
  867. break;
  868. default:
  869. WARN(1, "invalidate vhost IOTLB permission\n");
  870. break;
  871. }
  872. return flags;
  873. }
  874. static int vduse_kickfd_setup(struct vduse_dev *dev,
  875. struct vduse_vq_eventfd *eventfd)
  876. {
  877. struct eventfd_ctx *ctx = NULL;
  878. struct vduse_virtqueue *vq;
  879. u32 index;
  880. if (eventfd->index >= dev->vq_num)
  881. return -EINVAL;
  882. index = array_index_nospec(eventfd->index, dev->vq_num);
  883. vq = dev->vqs[index];
  884. if (eventfd->fd >= 0) {
  885. ctx = eventfd_ctx_fdget(eventfd->fd);
  886. if (IS_ERR(ctx))
  887. return PTR_ERR(ctx);
  888. } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
  889. return 0;
  890. spin_lock(&vq->kick_lock);
  891. if (vq->kickfd)
  892. eventfd_ctx_put(vq->kickfd);
  893. vq->kickfd = ctx;
  894. if (vq->ready && vq->kicked && vq->kickfd) {
  895. eventfd_signal(vq->kickfd);
  896. vq->kicked = false;
  897. }
  898. spin_unlock(&vq->kick_lock);
  899. return 0;
  900. }
  901. static bool vduse_dev_is_ready(struct vduse_dev *dev)
  902. {
  903. int i;
  904. for (i = 0; i < dev->vq_num; i++)
  905. if (!dev->vqs[i]->num_max)
  906. return false;
  907. return true;
  908. }
  909. static void vduse_dev_irq_inject(struct work_struct *work)
  910. {
  911. struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
  912. spin_lock_bh(&dev->irq_lock);
  913. if (dev->config_cb.callback)
  914. dev->config_cb.callback(dev->config_cb.private);
  915. spin_unlock_bh(&dev->irq_lock);
  916. }
  917. static void vduse_vq_irq_inject(struct work_struct *work)
  918. {
  919. struct vduse_virtqueue *vq = container_of(work,
  920. struct vduse_virtqueue, inject);
  921. spin_lock_bh(&vq->irq_lock);
  922. if (vq->ready && vq->cb.callback)
  923. vq->cb.callback(vq->cb.private);
  924. spin_unlock_bh(&vq->irq_lock);
  925. }
  926. static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
  927. {
  928. bool signal = false;
  929. if (!vq->cb.trigger)
  930. return false;
  931. spin_lock_irq(&vq->irq_lock);
  932. if (vq->ready && vq->cb.trigger) {
  933. eventfd_signal(vq->cb.trigger);
  934. signal = true;
  935. }
  936. spin_unlock_irq(&vq->irq_lock);
  937. return signal;
  938. }
  939. static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
  940. struct work_struct *irq_work,
  941. int irq_effective_cpu)
  942. {
  943. int ret = -EINVAL;
  944. down_read(&dev->rwsem);
  945. if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
  946. goto unlock;
  947. ret = 0;
  948. if (irq_effective_cpu == IRQ_UNBOUND)
  949. queue_work(vduse_irq_wq, irq_work);
  950. else
  951. queue_work_on(irq_effective_cpu,
  952. vduse_irq_bound_wq, irq_work);
  953. unlock:
  954. up_read(&dev->rwsem);
  955. return ret;
  956. }
  957. static int vduse_dev_dereg_umem(struct vduse_dev *dev, u32 asid,
  958. u64 iova, u64 size)
  959. {
  960. int ret;
  961. mutex_lock(&dev->as[asid].mem_lock);
  962. ret = -ENOENT;
  963. if (!dev->as[asid].umem)
  964. goto unlock;
  965. ret = -EINVAL;
  966. if (!dev->as[asid].domain)
  967. goto unlock;
  968. if (dev->as[asid].umem->iova != iova ||
  969. size != dev->as[asid].domain->bounce_size)
  970. goto unlock;
  971. vduse_domain_remove_user_bounce_pages(dev->as[asid].domain);
  972. unpin_user_pages_dirty_lock(dev->as[asid].umem->pages,
  973. dev->as[asid].umem->npages, true);
  974. atomic64_sub(dev->as[asid].umem->npages, &dev->as[asid].umem->mm->pinned_vm);
  975. mmdrop(dev->as[asid].umem->mm);
  976. vfree(dev->as[asid].umem->pages);
  977. kfree(dev->as[asid].umem);
  978. dev->as[asid].umem = NULL;
  979. ret = 0;
  980. unlock:
  981. mutex_unlock(&dev->as[asid].mem_lock);
  982. return ret;
  983. }
  984. static int vduse_dev_reg_umem(struct vduse_dev *dev,
  985. u32 asid, u64 iova, u64 uaddr, u64 size)
  986. {
  987. struct page **page_list = NULL;
  988. struct vduse_umem *umem = NULL;
  989. long pinned = 0;
  990. unsigned long npages, lock_limit;
  991. int ret;
  992. if (!dev->as[asid].domain || !dev->as[asid].domain->bounce_map ||
  993. size != dev->as[asid].domain->bounce_size ||
  994. iova != 0 || uaddr & ~PAGE_MASK)
  995. return -EINVAL;
  996. mutex_lock(&dev->as[asid].mem_lock);
  997. ret = -EEXIST;
  998. if (dev->as[asid].umem)
  999. goto unlock;
  1000. ret = -ENOMEM;
  1001. npages = size >> PAGE_SHIFT;
  1002. page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
  1003. GFP_KERNEL_ACCOUNT);
  1004. umem = kzalloc_obj(*umem);
  1005. if (!page_list || !umem)
  1006. goto unlock;
  1007. mmap_read_lock(current->mm);
  1008. lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
  1009. if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
  1010. goto out;
  1011. pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
  1012. page_list);
  1013. if (pinned != npages) {
  1014. ret = pinned < 0 ? pinned : -ENOMEM;
  1015. goto out;
  1016. }
  1017. ret = vduse_domain_add_user_bounce_pages(dev->as[asid].domain,
  1018. page_list, pinned);
  1019. if (ret)
  1020. goto out;
  1021. atomic64_add(npages, &current->mm->pinned_vm);
  1022. umem->pages = page_list;
  1023. umem->npages = pinned;
  1024. umem->iova = iova;
  1025. umem->mm = current->mm;
  1026. mmgrab(current->mm);
  1027. dev->as[asid].umem = umem;
  1028. out:
  1029. if (ret && pinned > 0)
  1030. unpin_user_pages(page_list, pinned);
  1031. mmap_read_unlock(current->mm);
  1032. unlock:
  1033. if (ret) {
  1034. vfree(page_list);
  1035. kfree(umem);
  1036. }
  1037. mutex_unlock(&dev->as[asid].mem_lock);
  1038. return ret;
  1039. }
  1040. static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
  1041. {
  1042. int curr_cpu = vq->irq_effective_cpu;
  1043. while (true) {
  1044. curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
  1045. if (cpu_online(curr_cpu))
  1046. break;
  1047. if (curr_cpu >= nr_cpu_ids)
  1048. curr_cpu = IRQ_UNBOUND;
  1049. }
  1050. vq->irq_effective_cpu = curr_cpu;
  1051. }
  1052. static int vduse_dev_iotlb_entry(struct vduse_dev *dev,
  1053. struct vduse_iotlb_entry_v2 *entry,
  1054. struct file **f, uint64_t *capability)
  1055. {
  1056. u32 asid;
  1057. int r = -EINVAL;
  1058. struct vhost_iotlb_map *map;
  1059. if (entry->start > entry->last || entry->asid >= dev->nas)
  1060. return -EINVAL;
  1061. asid = array_index_nospec(entry->asid, dev->nas);
  1062. mutex_lock(&dev->domain_lock);
  1063. if (!dev->as[asid].domain)
  1064. goto out;
  1065. spin_lock(&dev->as[asid].domain->iotlb_lock);
  1066. map = vhost_iotlb_itree_first(dev->as[asid].domain->iotlb,
  1067. entry->start, entry->last);
  1068. if (map) {
  1069. if (f) {
  1070. const struct vdpa_map_file *map_file;
  1071. map_file = (struct vdpa_map_file *)map->opaque;
  1072. entry->offset = map_file->offset;
  1073. *f = get_file(map_file->file);
  1074. }
  1075. entry->start = map->start;
  1076. entry->last = map->last;
  1077. entry->perm = map->perm;
  1078. if (capability) {
  1079. *capability = 0;
  1080. if (dev->as[asid].domain->bounce_map && map->start == 0 &&
  1081. map->last == dev->as[asid].domain->bounce_size - 1)
  1082. *capability |= VDUSE_IOVA_CAP_UMEM;
  1083. }
  1084. r = 0;
  1085. }
  1086. spin_unlock(&dev->as[asid].domain->iotlb_lock);
  1087. out:
  1088. mutex_unlock(&dev->domain_lock);
  1089. return r;
  1090. }
  1091. static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
  1092. unsigned long arg)
  1093. {
  1094. struct vduse_dev *dev = file->private_data;
  1095. void __user *argp = (void __user *)arg;
  1096. int ret;
  1097. if (unlikely(dev->broken))
  1098. return -EPERM;
  1099. switch (cmd) {
  1100. case VDUSE_IOTLB_GET_FD:
  1101. case VDUSE_IOTLB_GET_FD2: {
  1102. struct vduse_iotlb_entry_v2 entry = {0};
  1103. struct file *f = NULL;
  1104. ret = -ENOIOCTLCMD;
  1105. if (dev->api_version < VDUSE_API_VERSION_1 &&
  1106. cmd == VDUSE_IOTLB_GET_FD2)
  1107. break;
  1108. ret = -EFAULT;
  1109. if (copy_from_user(&entry, argp, _IOC_SIZE(cmd)))
  1110. break;
  1111. ret = -EINVAL;
  1112. if (!is_mem_zero((const char *)entry.reserved,
  1113. sizeof(entry.reserved)))
  1114. break;
  1115. ret = vduse_dev_iotlb_entry(dev, &entry, &f, NULL);
  1116. if (ret)
  1117. break;
  1118. ret = -EINVAL;
  1119. if (!f)
  1120. break;
  1121. ret = copy_to_user(argp, &entry, _IOC_SIZE(cmd));
  1122. if (ret) {
  1123. ret = -EFAULT;
  1124. fput(f);
  1125. break;
  1126. }
  1127. ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm));
  1128. fput(f);
  1129. break;
  1130. }
  1131. case VDUSE_DEV_GET_FEATURES:
  1132. /*
  1133. * Just mirror what driver wrote here.
  1134. * The driver is expected to check FEATURE_OK later.
  1135. */
  1136. ret = put_user(dev->driver_features, (u64 __user *)argp);
  1137. break;
  1138. case VDUSE_DEV_SET_CONFIG: {
  1139. struct vduse_config_data config;
  1140. unsigned long size = offsetof(struct vduse_config_data,
  1141. buffer);
  1142. ret = -EFAULT;
  1143. if (copy_from_user(&config, argp, size))
  1144. break;
  1145. ret = -EINVAL;
  1146. if (config.offset > dev->config_size ||
  1147. config.length == 0 ||
  1148. config.length > dev->config_size - config.offset)
  1149. break;
  1150. ret = -EFAULT;
  1151. if (copy_from_user(dev->config + config.offset, argp + size,
  1152. config.length))
  1153. break;
  1154. ret = 0;
  1155. break;
  1156. }
  1157. case VDUSE_DEV_INJECT_CONFIG_IRQ:
  1158. ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
  1159. break;
  1160. case VDUSE_VQ_SETUP: {
  1161. struct vduse_vq_config config;
  1162. u32 index;
  1163. ret = -EFAULT;
  1164. if (copy_from_user(&config, argp, sizeof(config)))
  1165. break;
  1166. ret = -EINVAL;
  1167. if (config.index >= dev->vq_num)
  1168. break;
  1169. if (dev->api_version < VDUSE_API_VERSION_1) {
  1170. if (config.group)
  1171. break;
  1172. } else {
  1173. if (config.group >= dev->ngroups)
  1174. break;
  1175. if (dev->status & VIRTIO_CONFIG_S_DRIVER_OK)
  1176. break;
  1177. }
  1178. if (config.reserved1 ||
  1179. !is_mem_zero((const char *)config.reserved2,
  1180. sizeof(config.reserved2)))
  1181. break;
  1182. index = array_index_nospec(config.index, dev->vq_num);
  1183. dev->vqs[index]->num_max = config.max_size;
  1184. dev->vqs[index]->group = config.group;
  1185. ret = 0;
  1186. break;
  1187. }
  1188. case VDUSE_VQ_GET_INFO: {
  1189. struct vduse_vq_info vq_info;
  1190. struct vduse_virtqueue *vq;
  1191. u32 index;
  1192. ret = -EFAULT;
  1193. if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
  1194. break;
  1195. ret = -EINVAL;
  1196. if (vq_info.index >= dev->vq_num)
  1197. break;
  1198. index = array_index_nospec(vq_info.index, dev->vq_num);
  1199. vq = dev->vqs[index];
  1200. vq_info.desc_addr = vq->desc_addr;
  1201. vq_info.driver_addr = vq->driver_addr;
  1202. vq_info.device_addr = vq->device_addr;
  1203. vq_info.num = vq->num;
  1204. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  1205. vq_info.packed.last_avail_counter =
  1206. vq->state.packed.last_avail_counter;
  1207. vq_info.packed.last_avail_idx =
  1208. vq->state.packed.last_avail_idx;
  1209. vq_info.packed.last_used_counter =
  1210. vq->state.packed.last_used_counter;
  1211. vq_info.packed.last_used_idx =
  1212. vq->state.packed.last_used_idx;
  1213. } else
  1214. vq_info.split.avail_index =
  1215. vq->state.split.avail_index;
  1216. vq_info.ready = vq->ready;
  1217. ret = -EFAULT;
  1218. if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
  1219. break;
  1220. ret = 0;
  1221. break;
  1222. }
  1223. case VDUSE_VQ_SETUP_KICKFD: {
  1224. struct vduse_vq_eventfd eventfd;
  1225. ret = -EFAULT;
  1226. if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
  1227. break;
  1228. ret = vduse_kickfd_setup(dev, &eventfd);
  1229. break;
  1230. }
  1231. case VDUSE_VQ_INJECT_IRQ: {
  1232. u32 index;
  1233. ret = -EFAULT;
  1234. if (get_user(index, (u32 __user *)argp))
  1235. break;
  1236. ret = -EINVAL;
  1237. if (index >= dev->vq_num)
  1238. break;
  1239. ret = 0;
  1240. index = array_index_nospec(index, dev->vq_num);
  1241. if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
  1242. vduse_vq_update_effective_cpu(dev->vqs[index]);
  1243. ret = vduse_dev_queue_irq_work(dev,
  1244. &dev->vqs[index]->inject,
  1245. dev->vqs[index]->irq_effective_cpu);
  1246. }
  1247. break;
  1248. }
  1249. case VDUSE_IOTLB_REG_UMEM: {
  1250. struct vduse_iova_umem umem;
  1251. u32 asid;
  1252. ret = -EFAULT;
  1253. if (copy_from_user(&umem, argp, sizeof(umem)))
  1254. break;
  1255. ret = -EINVAL;
  1256. if (!is_mem_zero((const char *)umem.reserved,
  1257. sizeof(umem.reserved)) ||
  1258. (dev->api_version < VDUSE_API_VERSION_1 &&
  1259. umem.asid != 0) || umem.asid >= dev->nas)
  1260. break;
  1261. mutex_lock(&dev->domain_lock);
  1262. asid = array_index_nospec(umem.asid, dev->nas);
  1263. ret = vduse_dev_reg_umem(dev, asid, umem.iova,
  1264. umem.uaddr, umem.size);
  1265. mutex_unlock(&dev->domain_lock);
  1266. break;
  1267. }
  1268. case VDUSE_IOTLB_DEREG_UMEM: {
  1269. struct vduse_iova_umem umem;
  1270. u32 asid;
  1271. ret = -EFAULT;
  1272. if (copy_from_user(&umem, argp, sizeof(umem)))
  1273. break;
  1274. ret = -EINVAL;
  1275. if (!is_mem_zero((const char *)umem.reserved,
  1276. sizeof(umem.reserved)) ||
  1277. (dev->api_version < VDUSE_API_VERSION_1 &&
  1278. umem.asid != 0) ||
  1279. umem.asid >= dev->nas)
  1280. break;
  1281. mutex_lock(&dev->domain_lock);
  1282. asid = array_index_nospec(umem.asid, dev->nas);
  1283. ret = vduse_dev_dereg_umem(dev, asid, umem.iova,
  1284. umem.size);
  1285. mutex_unlock(&dev->domain_lock);
  1286. break;
  1287. }
  1288. case VDUSE_IOTLB_GET_INFO: {
  1289. struct vduse_iova_info info;
  1290. struct vduse_iotlb_entry_v2 entry;
  1291. ret = -EFAULT;
  1292. if (copy_from_user(&info, argp, sizeof(info)))
  1293. break;
  1294. if (!is_mem_zero((const char *)info.reserved,
  1295. sizeof(info.reserved)))
  1296. break;
  1297. if (dev->api_version < VDUSE_API_VERSION_1) {
  1298. if (info.asid)
  1299. break;
  1300. } else if (info.asid >= dev->nas)
  1301. break;
  1302. entry.start = info.start;
  1303. entry.last = info.last;
  1304. entry.asid = info.asid;
  1305. ret = vduse_dev_iotlb_entry(dev, &entry, NULL,
  1306. &info.capability);
  1307. if (ret < 0)
  1308. break;
  1309. info.start = entry.start;
  1310. info.last = entry.last;
  1311. info.asid = entry.asid;
  1312. ret = -EFAULT;
  1313. if (copy_to_user(argp, &info, sizeof(info)))
  1314. break;
  1315. ret = 0;
  1316. break;
  1317. }
  1318. default:
  1319. ret = -ENOIOCTLCMD;
  1320. break;
  1321. }
  1322. return ret;
  1323. }
  1324. static int vduse_dev_release(struct inode *inode, struct file *file)
  1325. {
  1326. struct vduse_dev *dev = file->private_data;
  1327. mutex_lock(&dev->domain_lock);
  1328. for (int i = 0; i < dev->nas; i++)
  1329. if (dev->as[i].domain)
  1330. vduse_dev_dereg_umem(dev, i, 0,
  1331. dev->as[i].domain->bounce_size);
  1332. mutex_unlock(&dev->domain_lock);
  1333. spin_lock(&dev->msg_lock);
  1334. /* Make sure the inflight messages can processed after reconncection */
  1335. list_splice_init(&dev->recv_list, &dev->send_list);
  1336. spin_unlock(&dev->msg_lock);
  1337. dev->connected = false;
  1338. return 0;
  1339. }
  1340. static struct vduse_dev *vduse_dev_get_from_minor(int minor)
  1341. {
  1342. struct vduse_dev *dev;
  1343. mutex_lock(&vduse_lock);
  1344. dev = idr_find(&vduse_idr, minor);
  1345. mutex_unlock(&vduse_lock);
  1346. return dev;
  1347. }
  1348. static int vduse_dev_open(struct inode *inode, struct file *file)
  1349. {
  1350. int ret;
  1351. struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
  1352. if (!dev)
  1353. return -ENODEV;
  1354. ret = -EBUSY;
  1355. mutex_lock(&dev->lock);
  1356. if (dev->connected)
  1357. goto unlock;
  1358. ret = 0;
  1359. dev->connected = true;
  1360. file->private_data = dev;
  1361. unlock:
  1362. mutex_unlock(&dev->lock);
  1363. return ret;
  1364. }
  1365. static const struct file_operations vduse_dev_fops = {
  1366. .owner = THIS_MODULE,
  1367. .open = vduse_dev_open,
  1368. .release = vduse_dev_release,
  1369. .read_iter = vduse_dev_read_iter,
  1370. .write_iter = vduse_dev_write_iter,
  1371. .poll = vduse_dev_poll,
  1372. .unlocked_ioctl = vduse_dev_ioctl,
  1373. .compat_ioctl = compat_ptr_ioctl,
  1374. .llseek = noop_llseek,
  1375. };
  1376. static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
  1377. {
  1378. return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
  1379. }
  1380. static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
  1381. const char *buf, size_t count)
  1382. {
  1383. cpumask_var_t new_value;
  1384. int ret;
  1385. if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
  1386. return -ENOMEM;
  1387. ret = cpumask_parse(buf, new_value);
  1388. if (ret)
  1389. goto free_mask;
  1390. ret = -EINVAL;
  1391. if (!cpumask_intersects(new_value, cpu_online_mask))
  1392. goto free_mask;
  1393. cpumask_copy(&vq->irq_affinity, new_value);
  1394. ret = count;
  1395. free_mask:
  1396. free_cpumask_var(new_value);
  1397. return ret;
  1398. }
  1399. struct vq_sysfs_entry {
  1400. struct attribute attr;
  1401. ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
  1402. ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
  1403. size_t count);
  1404. };
  1405. static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
  1406. static struct attribute *vq_attrs[] = {
  1407. &irq_cb_affinity_attr.attr,
  1408. NULL,
  1409. };
  1410. ATTRIBUTE_GROUPS(vq);
  1411. static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
  1412. char *buf)
  1413. {
  1414. struct vduse_virtqueue *vq = container_of(kobj,
  1415. struct vduse_virtqueue, kobj);
  1416. struct vq_sysfs_entry *entry = container_of(attr,
  1417. struct vq_sysfs_entry, attr);
  1418. if (!entry->show)
  1419. return -EIO;
  1420. return entry->show(vq, buf);
  1421. }
  1422. static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
  1423. const char *buf, size_t count)
  1424. {
  1425. struct vduse_virtqueue *vq = container_of(kobj,
  1426. struct vduse_virtqueue, kobj);
  1427. struct vq_sysfs_entry *entry = container_of(attr,
  1428. struct vq_sysfs_entry, attr);
  1429. if (!entry->store)
  1430. return -EIO;
  1431. return entry->store(vq, buf, count);
  1432. }
  1433. static const struct sysfs_ops vq_sysfs_ops = {
  1434. .show = vq_attr_show,
  1435. .store = vq_attr_store,
  1436. };
  1437. static void vq_release(struct kobject *kobj)
  1438. {
  1439. struct vduse_virtqueue *vq = container_of(kobj,
  1440. struct vduse_virtqueue, kobj);
  1441. kfree(vq);
  1442. }
  1443. static const struct kobj_type vq_type = {
  1444. .release = vq_release,
  1445. .sysfs_ops = &vq_sysfs_ops,
  1446. .default_groups = vq_groups,
  1447. };
  1448. static char *vduse_devnode(const struct device *dev, umode_t *mode)
  1449. {
  1450. return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
  1451. }
  1452. static const struct class vduse_class = {
  1453. .name = "vduse",
  1454. .devnode = vduse_devnode,
  1455. };
  1456. static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
  1457. {
  1458. int i;
  1459. if (!dev->vqs)
  1460. return;
  1461. for (i = 0; i < dev->vq_num; i++)
  1462. kobject_put(&dev->vqs[i]->kobj);
  1463. kfree(dev->vqs);
  1464. }
  1465. static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
  1466. {
  1467. int ret, i;
  1468. dev->vq_align = vq_align;
  1469. dev->vq_num = vq_num;
  1470. dev->vqs = kzalloc_objs(*dev->vqs, dev->vq_num);
  1471. if (!dev->vqs)
  1472. return -ENOMEM;
  1473. for (i = 0; i < vq_num; i++) {
  1474. dev->vqs[i] = kzalloc_obj(*dev->vqs[i]);
  1475. if (!dev->vqs[i]) {
  1476. ret = -ENOMEM;
  1477. goto err;
  1478. }
  1479. dev->vqs[i]->index = i;
  1480. dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
  1481. INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
  1482. INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
  1483. spin_lock_init(&dev->vqs[i]->kick_lock);
  1484. spin_lock_init(&dev->vqs[i]->irq_lock);
  1485. cpumask_setall(&dev->vqs[i]->irq_affinity);
  1486. kobject_init(&dev->vqs[i]->kobj, &vq_type);
  1487. ret = kobject_add(&dev->vqs[i]->kobj,
  1488. &dev->dev->kobj, "vq%d", i);
  1489. if (ret) {
  1490. kfree(dev->vqs[i]);
  1491. goto err;
  1492. }
  1493. }
  1494. return 0;
  1495. err:
  1496. while (i--)
  1497. kobject_put(&dev->vqs[i]->kobj);
  1498. kfree(dev->vqs);
  1499. dev->vqs = NULL;
  1500. return ret;
  1501. }
  1502. static struct vduse_dev *vduse_dev_create(void)
  1503. {
  1504. struct vduse_dev *dev = kzalloc_obj(*dev);
  1505. if (!dev)
  1506. return NULL;
  1507. mutex_init(&dev->lock);
  1508. mutex_init(&dev->domain_lock);
  1509. spin_lock_init(&dev->msg_lock);
  1510. INIT_LIST_HEAD(&dev->send_list);
  1511. INIT_LIST_HEAD(&dev->recv_list);
  1512. spin_lock_init(&dev->irq_lock);
  1513. init_rwsem(&dev->rwsem);
  1514. INIT_WORK(&dev->inject, vduse_dev_irq_inject);
  1515. init_waitqueue_head(&dev->waitq);
  1516. return dev;
  1517. }
  1518. static void vduse_dev_destroy(struct vduse_dev *dev)
  1519. {
  1520. kfree(dev);
  1521. }
  1522. static struct vduse_dev *vduse_find_dev(const char *name)
  1523. {
  1524. struct vduse_dev *dev;
  1525. int id;
  1526. idr_for_each_entry(&vduse_idr, dev, id)
  1527. if (!strcmp(dev->name, name))
  1528. return dev;
  1529. return NULL;
  1530. }
  1531. static int vduse_destroy_dev(char *name)
  1532. {
  1533. struct vduse_dev *dev = vduse_find_dev(name);
  1534. if (!dev)
  1535. return -EINVAL;
  1536. mutex_lock(&dev->lock);
  1537. if (dev->vdev || dev->connected) {
  1538. mutex_unlock(&dev->lock);
  1539. return -EBUSY;
  1540. }
  1541. dev->connected = true;
  1542. mutex_unlock(&dev->lock);
  1543. vduse_dev_reset(dev);
  1544. device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
  1545. idr_remove(&vduse_idr, dev->minor);
  1546. kvfree(dev->config);
  1547. vduse_dev_deinit_vqs(dev);
  1548. for (int i = 0; i < dev->nas; i++) {
  1549. if (dev->as[i].domain)
  1550. vduse_domain_destroy(dev->as[i].domain);
  1551. }
  1552. kfree(dev->as);
  1553. kfree(dev->name);
  1554. kfree(dev->groups);
  1555. vduse_dev_destroy(dev);
  1556. module_put(THIS_MODULE);
  1557. return 0;
  1558. }
  1559. static bool device_is_allowed(u32 device_id)
  1560. {
  1561. int i;
  1562. for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
  1563. if (allowed_device_id[i] == device_id)
  1564. return true;
  1565. return false;
  1566. }
  1567. static bool features_is_valid(struct vduse_dev_config *config)
  1568. {
  1569. if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
  1570. return false;
  1571. /* Now we only support read-only configuration space */
  1572. if ((config->device_id == VIRTIO_ID_BLOCK) &&
  1573. (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
  1574. return false;
  1575. else if ((config->device_id == VIRTIO_ID_NET) &&
  1576. (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
  1577. return false;
  1578. if ((config->device_id == VIRTIO_ID_NET) &&
  1579. !(config->features & BIT_ULL(VIRTIO_F_VERSION_1)))
  1580. return false;
  1581. return true;
  1582. }
  1583. static bool vduse_validate_config(struct vduse_dev_config *config,
  1584. u64 api_version)
  1585. {
  1586. if (!is_mem_zero((const char *)config->reserved,
  1587. sizeof(config->reserved)))
  1588. return false;
  1589. if (api_version < VDUSE_API_VERSION_1 &&
  1590. (config->ngroups || config->nas))
  1591. return false;
  1592. if (api_version >= VDUSE_API_VERSION_1) {
  1593. if (!config->ngroups || config->ngroups > VDUSE_DEV_MAX_GROUPS)
  1594. return false;
  1595. if (!config->nas || config->nas > VDUSE_DEV_MAX_AS)
  1596. return false;
  1597. }
  1598. if (config->vq_align > PAGE_SIZE)
  1599. return false;
  1600. if (config->config_size > PAGE_SIZE)
  1601. return false;
  1602. if (config->vq_num > 0xffff)
  1603. return false;
  1604. if (!config->name[0])
  1605. return false;
  1606. if (!device_is_allowed(config->device_id))
  1607. return false;
  1608. if (!features_is_valid(config))
  1609. return false;
  1610. return true;
  1611. }
  1612. static ssize_t msg_timeout_show(struct device *device,
  1613. struct device_attribute *attr, char *buf)
  1614. {
  1615. struct vduse_dev *dev = dev_get_drvdata(device);
  1616. return sysfs_emit(buf, "%u\n", dev->msg_timeout);
  1617. }
  1618. static ssize_t msg_timeout_store(struct device *device,
  1619. struct device_attribute *attr,
  1620. const char *buf, size_t count)
  1621. {
  1622. struct vduse_dev *dev = dev_get_drvdata(device);
  1623. int ret;
  1624. ret = kstrtouint(buf, 10, &dev->msg_timeout);
  1625. if (ret < 0)
  1626. return ret;
  1627. return count;
  1628. }
  1629. static DEVICE_ATTR_RW(msg_timeout);
  1630. static ssize_t bounce_size_show(struct device *device,
  1631. struct device_attribute *attr, char *buf)
  1632. {
  1633. struct vduse_dev *dev = dev_get_drvdata(device);
  1634. return sysfs_emit(buf, "%u\n", dev->bounce_size);
  1635. }
  1636. static ssize_t bounce_size_store(struct device *device,
  1637. struct device_attribute *attr,
  1638. const char *buf, size_t count)
  1639. {
  1640. struct vduse_dev *dev = dev_get_drvdata(device);
  1641. unsigned int bounce_size;
  1642. int ret;
  1643. ret = -EPERM;
  1644. mutex_lock(&dev->domain_lock);
  1645. /* Assuming that if the first domain is allocated, all are allocated */
  1646. if (dev->as[0].domain)
  1647. goto unlock;
  1648. ret = kstrtouint(buf, 10, &bounce_size);
  1649. if (ret < 0)
  1650. goto unlock;
  1651. ret = -EINVAL;
  1652. if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
  1653. bounce_size < VDUSE_MIN_BOUNCE_SIZE)
  1654. goto unlock;
  1655. dev->bounce_size = bounce_size & PAGE_MASK;
  1656. ret = count;
  1657. unlock:
  1658. mutex_unlock(&dev->domain_lock);
  1659. return ret;
  1660. }
  1661. static DEVICE_ATTR_RW(bounce_size);
  1662. static struct attribute *vduse_dev_attrs[] = {
  1663. &dev_attr_msg_timeout.attr,
  1664. &dev_attr_bounce_size.attr,
  1665. NULL
  1666. };
  1667. ATTRIBUTE_GROUPS(vduse_dev);
  1668. static int vduse_create_dev(struct vduse_dev_config *config,
  1669. void *config_buf, u64 api_version)
  1670. {
  1671. int ret;
  1672. struct vduse_dev *dev;
  1673. ret = -EPERM;
  1674. if ((config->device_id == VIRTIO_ID_NET) && !capable(CAP_NET_ADMIN))
  1675. goto err;
  1676. ret = -EEXIST;
  1677. if (vduse_find_dev(config->name))
  1678. goto err;
  1679. ret = -ENOMEM;
  1680. dev = vduse_dev_create();
  1681. if (!dev)
  1682. goto err;
  1683. dev->api_version = api_version;
  1684. dev->device_features = config->features;
  1685. dev->device_id = config->device_id;
  1686. dev->vendor_id = config->vendor_id;
  1687. dev->nas = (dev->api_version < VDUSE_API_VERSION_1) ? 1 : config->nas;
  1688. dev->as = kzalloc_objs(dev->as[0], dev->nas);
  1689. if (!dev->as)
  1690. goto err_as;
  1691. for (int i = 0; i < dev->nas; i++)
  1692. mutex_init(&dev->as[i].mem_lock);
  1693. dev->ngroups = (dev->api_version < VDUSE_API_VERSION_1)
  1694. ? 1
  1695. : config->ngroups;
  1696. dev->groups = kzalloc_objs(dev->groups[0], dev->ngroups);
  1697. if (!dev->groups)
  1698. goto err_vq_groups;
  1699. for (u32 i = 0; i < dev->ngroups; ++i) {
  1700. dev->groups[i].dev = dev;
  1701. rwlock_init(&dev->groups[i].as_lock);
  1702. dev->groups[i].as = &dev->as[0];
  1703. }
  1704. dev->name = kstrdup(config->name, GFP_KERNEL);
  1705. if (!dev->name)
  1706. goto err_str;
  1707. dev->bounce_size = VDUSE_BOUNCE_SIZE;
  1708. dev->config = config_buf;
  1709. dev->config_size = config->config_size;
  1710. ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
  1711. if (ret < 0)
  1712. goto err_idr;
  1713. dev->minor = ret;
  1714. dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
  1715. dev->dev = device_create_with_groups(&vduse_class, NULL,
  1716. MKDEV(MAJOR(vduse_major), dev->minor),
  1717. dev, vduse_dev_groups, "%s", config->name);
  1718. if (IS_ERR(dev->dev)) {
  1719. ret = PTR_ERR(dev->dev);
  1720. goto err_dev;
  1721. }
  1722. ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
  1723. if (ret)
  1724. goto err_vqs;
  1725. __module_get(THIS_MODULE);
  1726. return 0;
  1727. err_vqs:
  1728. device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
  1729. err_dev:
  1730. idr_remove(&vduse_idr, dev->minor);
  1731. err_idr:
  1732. kfree(dev->name);
  1733. err_str:
  1734. kfree(dev->groups);
  1735. err_vq_groups:
  1736. kfree(dev->as);
  1737. err_as:
  1738. vduse_dev_destroy(dev);
  1739. err:
  1740. return ret;
  1741. }
  1742. static long vduse_ioctl(struct file *file, unsigned int cmd,
  1743. unsigned long arg)
  1744. {
  1745. int ret;
  1746. void __user *argp = (void __user *)arg;
  1747. struct vduse_control *control = file->private_data;
  1748. mutex_lock(&vduse_lock);
  1749. switch (cmd) {
  1750. case VDUSE_GET_API_VERSION:
  1751. if (control->api_version == VDUSE_API_VERSION_NOT_ASKED)
  1752. control->api_version = VDUSE_API_VERSION_1;
  1753. ret = put_user(control->api_version, (u64 __user *)argp);
  1754. break;
  1755. case VDUSE_SET_API_VERSION: {
  1756. u64 api_version;
  1757. ret = -EFAULT;
  1758. if (get_user(api_version, (u64 __user *)argp))
  1759. break;
  1760. ret = -EINVAL;
  1761. if (api_version > VDUSE_API_VERSION_1)
  1762. break;
  1763. ret = 0;
  1764. control->api_version = api_version;
  1765. break;
  1766. }
  1767. case VDUSE_CREATE_DEV: {
  1768. struct vduse_dev_config config;
  1769. unsigned long size = offsetof(struct vduse_dev_config, config);
  1770. void *buf;
  1771. ret = -EFAULT;
  1772. if (copy_from_user(&config, argp, size))
  1773. break;
  1774. ret = -EINVAL;
  1775. if (control->api_version == VDUSE_API_VERSION_NOT_ASKED)
  1776. control->api_version = VDUSE_API_VERSION;
  1777. if (!vduse_validate_config(&config, control->api_version))
  1778. break;
  1779. buf = vmemdup_user(argp + size, config.config_size);
  1780. if (IS_ERR(buf)) {
  1781. ret = PTR_ERR(buf);
  1782. break;
  1783. }
  1784. config.name[VDUSE_NAME_MAX - 1] = '\0';
  1785. ret = vduse_create_dev(&config, buf, control->api_version);
  1786. if (ret)
  1787. kvfree(buf);
  1788. break;
  1789. }
  1790. case VDUSE_DESTROY_DEV: {
  1791. char name[VDUSE_NAME_MAX];
  1792. ret = -EFAULT;
  1793. if (copy_from_user(name, argp, VDUSE_NAME_MAX))
  1794. break;
  1795. name[VDUSE_NAME_MAX - 1] = '\0';
  1796. ret = vduse_destroy_dev(name);
  1797. break;
  1798. }
  1799. default:
  1800. ret = -EINVAL;
  1801. break;
  1802. }
  1803. mutex_unlock(&vduse_lock);
  1804. return ret;
  1805. }
  1806. static int vduse_release(struct inode *inode, struct file *file)
  1807. {
  1808. struct vduse_control *control = file->private_data;
  1809. kfree(control);
  1810. return 0;
  1811. }
  1812. static int vduse_open(struct inode *inode, struct file *file)
  1813. {
  1814. struct vduse_control *control;
  1815. control = kmalloc_obj(struct vduse_control);
  1816. if (!control)
  1817. return -ENOMEM;
  1818. control->api_version = VDUSE_API_VERSION_NOT_ASKED;
  1819. file->private_data = control;
  1820. return 0;
  1821. }
  1822. static const struct file_operations vduse_ctrl_fops = {
  1823. .owner = THIS_MODULE,
  1824. .open = vduse_open,
  1825. .release = vduse_release,
  1826. .unlocked_ioctl = vduse_ioctl,
  1827. .compat_ioctl = compat_ptr_ioctl,
  1828. .llseek = noop_llseek,
  1829. };
  1830. struct vduse_mgmt_dev {
  1831. struct vdpa_mgmt_dev mgmt_dev;
  1832. struct device dev;
  1833. };
  1834. static struct vduse_mgmt_dev *vduse_mgmt;
  1835. static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
  1836. {
  1837. struct vduse_vdpa *vdev;
  1838. if (dev->vdev)
  1839. return -EEXIST;
  1840. vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
  1841. &vduse_vdpa_config_ops, &vduse_map_ops,
  1842. dev->ngroups, dev->nas, name, true);
  1843. if (IS_ERR(vdev))
  1844. return PTR_ERR(vdev);
  1845. dev->vdev = vdev;
  1846. vdev->dev = dev;
  1847. vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
  1848. return 0;
  1849. }
  1850. static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
  1851. const struct vdpa_dev_set_config *config)
  1852. {
  1853. struct vduse_dev *dev;
  1854. size_t domain_bounce_size;
  1855. int ret, i;
  1856. mutex_lock(&vduse_lock);
  1857. dev = vduse_find_dev(name);
  1858. if (!dev || !vduse_dev_is_ready(dev)) {
  1859. mutex_unlock(&vduse_lock);
  1860. return -EINVAL;
  1861. }
  1862. ret = vduse_dev_init_vdpa(dev, name);
  1863. mutex_unlock(&vduse_lock);
  1864. if (ret)
  1865. return ret;
  1866. mutex_lock(&dev->domain_lock);
  1867. ret = 0;
  1868. domain_bounce_size = dev->bounce_size / dev->nas;
  1869. for (i = 0; i < dev->nas; ++i) {
  1870. dev->as[i].domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
  1871. domain_bounce_size);
  1872. if (!dev->as[i].domain) {
  1873. ret = -ENOMEM;
  1874. goto err;
  1875. }
  1876. }
  1877. mutex_unlock(&dev->domain_lock);
  1878. ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
  1879. if (ret)
  1880. goto err_register;
  1881. return 0;
  1882. err_register:
  1883. mutex_lock(&dev->domain_lock);
  1884. err:
  1885. for (int j = 0; j < i; j++) {
  1886. if (dev->as[j].domain) {
  1887. vduse_domain_destroy(dev->as[j].domain);
  1888. dev->as[j].domain = NULL;
  1889. }
  1890. }
  1891. mutex_unlock(&dev->domain_lock);
  1892. put_device(&dev->vdev->vdpa.dev);
  1893. return ret;
  1894. }
  1895. static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
  1896. {
  1897. _vdpa_unregister_device(dev);
  1898. }
  1899. static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
  1900. .dev_add = vdpa_dev_add,
  1901. .dev_del = vdpa_dev_del,
  1902. };
  1903. static struct virtio_device_id id_table[] = {
  1904. { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
  1905. { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
  1906. { 0 },
  1907. };
  1908. static void vduse_mgmtdev_release(struct device *dev)
  1909. {
  1910. struct vduse_mgmt_dev *mgmt_dev;
  1911. mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
  1912. kfree(mgmt_dev);
  1913. }
  1914. static int vduse_mgmtdev_init(void)
  1915. {
  1916. int ret;
  1917. vduse_mgmt = kzalloc_obj(*vduse_mgmt);
  1918. if (!vduse_mgmt)
  1919. return -ENOMEM;
  1920. ret = dev_set_name(&vduse_mgmt->dev, "vduse");
  1921. if (ret) {
  1922. kfree(vduse_mgmt);
  1923. return ret;
  1924. }
  1925. vduse_mgmt->dev.release = vduse_mgmtdev_release;
  1926. ret = device_register(&vduse_mgmt->dev);
  1927. if (ret)
  1928. goto dev_reg_err;
  1929. vduse_mgmt->mgmt_dev.id_table = id_table;
  1930. vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
  1931. vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
  1932. ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
  1933. if (ret)
  1934. device_unregister(&vduse_mgmt->dev);
  1935. return ret;
  1936. dev_reg_err:
  1937. put_device(&vduse_mgmt->dev);
  1938. return ret;
  1939. }
  1940. static void vduse_mgmtdev_exit(void)
  1941. {
  1942. vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
  1943. device_unregister(&vduse_mgmt->dev);
  1944. }
  1945. static int vduse_init(void)
  1946. {
  1947. int ret;
  1948. struct device *dev;
  1949. ret = class_register(&vduse_class);
  1950. if (ret)
  1951. return ret;
  1952. ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
  1953. if (ret)
  1954. goto err_chardev_region;
  1955. /* /dev/vduse/control */
  1956. cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
  1957. vduse_ctrl_cdev.owner = THIS_MODULE;
  1958. ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
  1959. if (ret)
  1960. goto err_ctrl_cdev;
  1961. dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
  1962. if (IS_ERR(dev)) {
  1963. ret = PTR_ERR(dev);
  1964. goto err_device;
  1965. }
  1966. /* /dev/vduse/$DEVICE */
  1967. cdev_init(&vduse_cdev, &vduse_dev_fops);
  1968. vduse_cdev.owner = THIS_MODULE;
  1969. ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
  1970. VDUSE_DEV_MAX - 1);
  1971. if (ret)
  1972. goto err_cdev;
  1973. ret = -ENOMEM;
  1974. vduse_irq_wq = alloc_workqueue("vduse-irq",
  1975. WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
  1976. if (!vduse_irq_wq)
  1977. goto err_wq;
  1978. vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound",
  1979. WQ_HIGHPRI | WQ_PERCPU, 0);
  1980. if (!vduse_irq_bound_wq)
  1981. goto err_bound_wq;
  1982. ret = vduse_domain_init();
  1983. if (ret)
  1984. goto err_domain;
  1985. ret = vduse_mgmtdev_init();
  1986. if (ret)
  1987. goto err_mgmtdev;
  1988. return 0;
  1989. err_mgmtdev:
  1990. vduse_domain_exit();
  1991. err_domain:
  1992. destroy_workqueue(vduse_irq_bound_wq);
  1993. err_bound_wq:
  1994. destroy_workqueue(vduse_irq_wq);
  1995. err_wq:
  1996. cdev_del(&vduse_cdev);
  1997. err_cdev:
  1998. device_destroy(&vduse_class, vduse_major);
  1999. err_device:
  2000. cdev_del(&vduse_ctrl_cdev);
  2001. err_ctrl_cdev:
  2002. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  2003. err_chardev_region:
  2004. class_unregister(&vduse_class);
  2005. return ret;
  2006. }
  2007. module_init(vduse_init);
  2008. static void vduse_exit(void)
  2009. {
  2010. vduse_mgmtdev_exit();
  2011. vduse_domain_exit();
  2012. destroy_workqueue(vduse_irq_bound_wq);
  2013. destroy_workqueue(vduse_irq_wq);
  2014. cdev_del(&vduse_cdev);
  2015. device_destroy(&vduse_class, vduse_major);
  2016. cdev_del(&vduse_ctrl_cdev);
  2017. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  2018. class_unregister(&vduse_class);
  2019. idr_destroy(&vduse_idr);
  2020. }
  2021. module_exit(vduse_exit);
  2022. MODULE_LICENSE(DRV_LICENSE);
  2023. MODULE_AUTHOR(DRV_AUTHOR);
  2024. MODULE_DESCRIPTION(DRV_DESC);