vdpa.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2018-2020 Intel Corporation.
  4. * Copyright (C) 2020 Red Hat, Inc.
  5. *
  6. * Author: Tiwei Bie <tiwei.bie@intel.com>
  7. * Jason Wang <jasowang@redhat.com>
  8. *
  9. * Thanks Michael S. Tsirkin for the valuable comments and
  10. * suggestions. And thanks to Cunming Liang and Zhihong Wang for all
  11. * their supports.
  12. */
  13. #include <linux/kernel.h>
  14. #include <linux/module.h>
  15. #include <linux/cdev.h>
  16. #include <linux/device.h>
  17. #include <linux/mm.h>
  18. #include <linux/slab.h>
  19. #include <linux/iommu.h>
  20. #include <linux/uuid.h>
  21. #include <linux/vdpa.h>
  22. #include <linux/nospec.h>
  23. #include <linux/vhost.h>
  24. #include "vhost.h"
  25. enum {
  26. VHOST_VDPA_BACKEND_FEATURES =
  27. (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  28. (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
  29. (1ULL << VHOST_BACKEND_F_IOTLB_ASID),
  30. };
  31. #define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  32. #define VHOST_VDPA_IOTLB_BUCKETS 16
  33. struct vhost_vdpa_as {
  34. struct hlist_node hash_link;
  35. struct vhost_iotlb iotlb;
  36. u32 id;
  37. };
  38. struct vhost_vdpa {
  39. struct vhost_dev vdev;
  40. struct iommu_domain *domain;
  41. struct vhost_virtqueue *vqs;
  42. struct completion completion;
  43. struct vdpa_device *vdpa;
  44. struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
  45. struct device dev;
  46. struct cdev cdev;
  47. atomic_t opened;
  48. u32 nvqs;
  49. int virtio_id;
  50. int minor;
  51. struct eventfd_ctx *config_ctx;
  52. int in_batch;
  53. struct vdpa_iova_range range;
  54. u32 batch_asid;
  55. bool suspended;
  56. };
  57. static DEFINE_IDA(vhost_vdpa_ida);
  58. static dev_t vhost_vdpa_major;
  59. static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  60. struct vhost_iotlb *iotlb, u64 start,
  61. u64 last, u32 asid);
  62. static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
  63. {
  64. struct vhost_vdpa_as *as = container_of(iotlb, struct
  65. vhost_vdpa_as, iotlb);
  66. return as->id;
  67. }
  68. static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
  69. {
  70. struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  71. struct vhost_vdpa_as *as;
  72. hlist_for_each_entry(as, head, hash_link)
  73. if (as->id == asid)
  74. return as;
  75. return NULL;
  76. }
  77. static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
  78. {
  79. struct vhost_vdpa_as *as = asid_to_as(v, asid);
  80. if (!as)
  81. return NULL;
  82. return &as->iotlb;
  83. }
  84. static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
  85. {
  86. struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  87. struct vhost_vdpa_as *as;
  88. if (asid_to_as(v, asid))
  89. return NULL;
  90. if (asid >= v->vdpa->nas)
  91. return NULL;
  92. as = kmalloc_obj(*as);
  93. if (!as)
  94. return NULL;
  95. vhost_iotlb_init(&as->iotlb, 0, 0);
  96. as->id = asid;
  97. hlist_add_head(&as->hash_link, head);
  98. return as;
  99. }
  100. static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
  101. u32 asid)
  102. {
  103. struct vhost_vdpa_as *as = asid_to_as(v, asid);
  104. if (as)
  105. return as;
  106. return vhost_vdpa_alloc_as(v, asid);
  107. }
  108. static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
  109. {
  110. struct vdpa_device *vdpa = v->vdpa;
  111. const struct vdpa_config_ops *ops = vdpa->config;
  112. if (ops->reset_map)
  113. ops->reset_map(vdpa, asid);
  114. }
  115. static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
  116. {
  117. struct vhost_vdpa_as *as = asid_to_as(v, asid);
  118. if (!as)
  119. return -EINVAL;
  120. hlist_del(&as->hash_link);
  121. vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
  122. /*
  123. * Devices with vendor specific IOMMU may need to restore
  124. * iotlb to the initial or default state, which cannot be
  125. * cleaned up in the all range unmap call above. Give them
  126. * a chance to clean up or reset the map to the desired
  127. * state.
  128. */
  129. vhost_vdpa_reset_map(v, asid);
  130. kfree(as);
  131. return 0;
  132. }
  133. static void handle_vq_kick(struct vhost_work *work)
  134. {
  135. struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  136. poll.work);
  137. struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
  138. const struct vdpa_config_ops *ops = v->vdpa->config;
  139. ops->kick_vq(v->vdpa, vq - v->vqs);
  140. }
  141. static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
  142. {
  143. struct vhost_virtqueue *vq = private;
  144. struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
  145. if (call_ctx)
  146. eventfd_signal(call_ctx);
  147. return IRQ_HANDLED;
  148. }
  149. static irqreturn_t vhost_vdpa_config_cb(void *private)
  150. {
  151. struct vhost_vdpa *v = private;
  152. struct eventfd_ctx *config_ctx = v->config_ctx;
  153. if (config_ctx)
  154. eventfd_signal(config_ctx);
  155. return IRQ_HANDLED;
  156. }
  157. static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
  158. {
  159. struct vhost_virtqueue *vq = &v->vqs[qid];
  160. const struct vdpa_config_ops *ops = v->vdpa->config;
  161. struct vdpa_device *vdpa = v->vdpa;
  162. int ret, irq;
  163. if (!ops->get_vq_irq)
  164. return;
  165. irq = ops->get_vq_irq(vdpa, qid);
  166. if (irq < 0)
  167. return;
  168. if (!vq->call_ctx.ctx)
  169. return;
  170. ret = irq_bypass_register_producer(&vq->call_ctx.producer,
  171. vq->call_ctx.ctx, irq);
  172. if (unlikely(ret))
  173. dev_info(&v->dev, "vq %u, irq bypass producer (eventfd %p) registration fails, ret = %d\n",
  174. qid, vq->call_ctx.ctx, ret);
  175. }
  176. static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
  177. {
  178. struct vhost_virtqueue *vq = &v->vqs[qid];
  179. irq_bypass_unregister_producer(&vq->call_ctx.producer);
  180. }
  181. static int _compat_vdpa_reset(struct vhost_vdpa *v)
  182. {
  183. struct vdpa_device *vdpa = v->vdpa;
  184. u32 flags = 0;
  185. v->suspended = false;
  186. if (v->vdev.vqs) {
  187. flags |= !vhost_backend_has_feature(v->vdev.vqs[0],
  188. VHOST_BACKEND_F_IOTLB_PERSIST) ?
  189. VDPA_RESET_F_CLEAN_MAP : 0;
  190. }
  191. return vdpa_reset(vdpa, flags);
  192. }
  193. static int vhost_vdpa_reset(struct vhost_vdpa *v)
  194. {
  195. v->in_batch = 0;
  196. return _compat_vdpa_reset(v);
  197. }
  198. static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
  199. {
  200. struct vdpa_device *vdpa = v->vdpa;
  201. const struct vdpa_config_ops *ops = vdpa->config;
  202. if (!vdpa->use_va || !ops->bind_mm)
  203. return 0;
  204. return ops->bind_mm(vdpa, v->vdev.mm);
  205. }
  206. static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
  207. {
  208. struct vdpa_device *vdpa = v->vdpa;
  209. const struct vdpa_config_ops *ops = vdpa->config;
  210. if (!vdpa->use_va || !ops->unbind_mm)
  211. return;
  212. ops->unbind_mm(vdpa);
  213. }
  214. static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
  215. {
  216. struct vdpa_device *vdpa = v->vdpa;
  217. const struct vdpa_config_ops *ops = vdpa->config;
  218. u32 device_id;
  219. device_id = ops->get_device_id(vdpa);
  220. if (copy_to_user(argp, &device_id, sizeof(device_id)))
  221. return -EFAULT;
  222. return 0;
  223. }
  224. static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
  225. {
  226. struct vdpa_device *vdpa = v->vdpa;
  227. const struct vdpa_config_ops *ops = vdpa->config;
  228. u8 status;
  229. status = ops->get_status(vdpa);
  230. if (copy_to_user(statusp, &status, sizeof(status)))
  231. return -EFAULT;
  232. return 0;
  233. }
  234. static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
  235. {
  236. struct vdpa_device *vdpa = v->vdpa;
  237. const struct vdpa_config_ops *ops = vdpa->config;
  238. u8 status, status_old;
  239. u32 nvqs = v->nvqs;
  240. int ret;
  241. u16 i;
  242. if (copy_from_user(&status, statusp, sizeof(status)))
  243. return -EFAULT;
  244. status_old = ops->get_status(vdpa);
  245. /*
  246. * Userspace shouldn't remove status bits unless reset the
  247. * status to 0.
  248. */
  249. if (status != 0 && (status_old & ~status) != 0)
  250. return -EINVAL;
  251. if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
  252. for (i = 0; i < nvqs; i++)
  253. vhost_vdpa_unsetup_vq_irq(v, i);
  254. if (status == 0) {
  255. ret = _compat_vdpa_reset(v);
  256. if (ret)
  257. return ret;
  258. } else
  259. vdpa_set_status(vdpa, status);
  260. if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
  261. for (i = 0; i < nvqs; i++)
  262. vhost_vdpa_setup_vq_irq(v, i);
  263. return 0;
  264. }
  265. static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
  266. struct vhost_vdpa_config *c)
  267. {
  268. struct vdpa_device *vdpa = v->vdpa;
  269. size_t size = vdpa->config->get_config_size(vdpa);
  270. if (c->len == 0 || c->off > size)
  271. return -EINVAL;
  272. if (c->len > size - c->off)
  273. return -E2BIG;
  274. return 0;
  275. }
  276. static long vhost_vdpa_get_config(struct vhost_vdpa *v,
  277. struct vhost_vdpa_config __user *c)
  278. {
  279. struct vdpa_device *vdpa = v->vdpa;
  280. struct vhost_vdpa_config config;
  281. unsigned long size = offsetof(struct vhost_vdpa_config, buf);
  282. u8 *buf;
  283. if (copy_from_user(&config, c, size))
  284. return -EFAULT;
  285. if (vhost_vdpa_config_validate(v, &config))
  286. return -EINVAL;
  287. buf = kvzalloc(config.len, GFP_KERNEL);
  288. if (!buf)
  289. return -ENOMEM;
  290. vdpa_get_config(vdpa, config.off, buf, config.len);
  291. if (copy_to_user(c->buf, buf, config.len)) {
  292. kvfree(buf);
  293. return -EFAULT;
  294. }
  295. kvfree(buf);
  296. return 0;
  297. }
  298. static long vhost_vdpa_set_config(struct vhost_vdpa *v,
  299. struct vhost_vdpa_config __user *c)
  300. {
  301. struct vdpa_device *vdpa = v->vdpa;
  302. struct vhost_vdpa_config config;
  303. unsigned long size = offsetof(struct vhost_vdpa_config, buf);
  304. u8 *buf;
  305. if (copy_from_user(&config, c, size))
  306. return -EFAULT;
  307. if (vhost_vdpa_config_validate(v, &config))
  308. return -EINVAL;
  309. buf = vmemdup_user(c->buf, config.len);
  310. if (IS_ERR(buf))
  311. return PTR_ERR(buf);
  312. vdpa_set_config(vdpa, config.off, buf, config.len);
  313. kvfree(buf);
  314. return 0;
  315. }
  316. static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
  317. {
  318. struct vdpa_device *vdpa = v->vdpa;
  319. const struct vdpa_config_ops *ops = vdpa->config;
  320. return ops->suspend;
  321. }
  322. static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
  323. {
  324. struct vdpa_device *vdpa = v->vdpa;
  325. const struct vdpa_config_ops *ops = vdpa->config;
  326. return ops->resume;
  327. }
  328. static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v)
  329. {
  330. struct vdpa_device *vdpa = v->vdpa;
  331. const struct vdpa_config_ops *ops = vdpa->config;
  332. return ops->get_vq_desc_group;
  333. }
  334. static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
  335. {
  336. struct vdpa_device *vdpa = v->vdpa;
  337. const struct vdpa_config_ops *ops = vdpa->config;
  338. u64 features;
  339. features = ops->get_device_features(vdpa);
  340. if (copy_to_user(featurep, &features, sizeof(features)))
  341. return -EFAULT;
  342. return 0;
  343. }
  344. static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v)
  345. {
  346. struct vdpa_device *vdpa = v->vdpa;
  347. const struct vdpa_config_ops *ops = vdpa->config;
  348. if (!ops->get_backend_features)
  349. return 0;
  350. else
  351. return ops->get_backend_features(vdpa);
  352. }
  353. static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
  354. {
  355. struct vdpa_device *vdpa = v->vdpa;
  356. const struct vdpa_config_ops *ops = vdpa->config;
  357. return (!ops->set_map && !ops->dma_map) || ops->reset_map ||
  358. vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
  359. }
  360. static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
  361. {
  362. struct vdpa_device *vdpa = v->vdpa;
  363. const struct vdpa_config_ops *ops = vdpa->config;
  364. struct vhost_dev *d = &v->vdev;
  365. u64 actual_features;
  366. u64 features;
  367. int i;
  368. /*
  369. * It's not allowed to change the features after they have
  370. * been negotiated.
  371. */
  372. if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
  373. return -EBUSY;
  374. if (copy_from_user(&features, featurep, sizeof(features)))
  375. return -EFAULT;
  376. if (vdpa_set_features(vdpa, features))
  377. return -EINVAL;
  378. /* let the vqs know what has been configured */
  379. actual_features = ops->get_driver_features(vdpa);
  380. for (i = 0; i < d->nvqs; ++i) {
  381. struct vhost_virtqueue *vq = d->vqs[i];
  382. mutex_lock(&vq->mutex);
  383. vq->acked_features = actual_features;
  384. mutex_unlock(&vq->mutex);
  385. }
  386. return 0;
  387. }
  388. static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
  389. {
  390. struct vdpa_device *vdpa = v->vdpa;
  391. const struct vdpa_config_ops *ops = vdpa->config;
  392. u16 num;
  393. num = ops->get_vq_num_max(vdpa);
  394. if (copy_to_user(argp, &num, sizeof(num)))
  395. return -EFAULT;
  396. return 0;
  397. }
  398. static void vhost_vdpa_config_put(struct vhost_vdpa *v)
  399. {
  400. if (v->config_ctx) {
  401. eventfd_ctx_put(v->config_ctx);
  402. v->config_ctx = NULL;
  403. }
  404. }
  405. static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
  406. {
  407. struct vdpa_callback cb;
  408. int fd;
  409. struct eventfd_ctx *ctx;
  410. cb.callback = vhost_vdpa_config_cb;
  411. cb.private = v;
  412. if (copy_from_user(&fd, argp, sizeof(fd)))
  413. return -EFAULT;
  414. ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
  415. swap(ctx, v->config_ctx);
  416. if (!IS_ERR_OR_NULL(ctx))
  417. eventfd_ctx_put(ctx);
  418. if (IS_ERR(v->config_ctx)) {
  419. long ret = PTR_ERR(v->config_ctx);
  420. v->config_ctx = NULL;
  421. return ret;
  422. }
  423. v->vdpa->config->set_config_cb(v->vdpa, &cb);
  424. return 0;
  425. }
  426. static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
  427. {
  428. struct vhost_vdpa_iova_range range = {
  429. .first = v->range.first,
  430. .last = v->range.last,
  431. };
  432. if (copy_to_user(argp, &range, sizeof(range)))
  433. return -EFAULT;
  434. return 0;
  435. }
  436. static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
  437. {
  438. struct vdpa_device *vdpa = v->vdpa;
  439. const struct vdpa_config_ops *ops = vdpa->config;
  440. u32 size;
  441. size = ops->get_config_size(vdpa);
  442. if (copy_to_user(argp, &size, sizeof(size)))
  443. return -EFAULT;
  444. return 0;
  445. }
  446. static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
  447. {
  448. struct vdpa_device *vdpa = v->vdpa;
  449. if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
  450. return -EFAULT;
  451. return 0;
  452. }
  453. /* After a successful return of ioctl the device must not process more
  454. * virtqueue descriptors. The device can answer to read or writes of config
  455. * fields as if it were not suspended. In particular, writing to "queue_enable"
  456. * with a value of 1 will not make the device start processing buffers.
  457. */
  458. static long vhost_vdpa_suspend(struct vhost_vdpa *v)
  459. {
  460. struct vdpa_device *vdpa = v->vdpa;
  461. const struct vdpa_config_ops *ops = vdpa->config;
  462. int ret;
  463. if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
  464. return 0;
  465. if (!ops->suspend)
  466. return -EOPNOTSUPP;
  467. ret = ops->suspend(vdpa);
  468. if (!ret)
  469. v->suspended = true;
  470. return ret;
  471. }
  472. /* After a successful return of this ioctl the device resumes processing
  473. * virtqueue descriptors. The device becomes fully operational the same way it
  474. * was before it was suspended.
  475. */
  476. static long vhost_vdpa_resume(struct vhost_vdpa *v)
  477. {
  478. struct vdpa_device *vdpa = v->vdpa;
  479. const struct vdpa_config_ops *ops = vdpa->config;
  480. int ret;
  481. if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
  482. return 0;
  483. if (!ops->resume)
  484. return -EOPNOTSUPP;
  485. ret = ops->resume(vdpa);
  486. if (!ret)
  487. v->suspended = false;
  488. return ret;
  489. }
  490. static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
  491. void __user *argp)
  492. {
  493. struct vdpa_device *vdpa = v->vdpa;
  494. const struct vdpa_config_ops *ops = vdpa->config;
  495. struct vdpa_vq_state vq_state;
  496. struct vdpa_callback cb;
  497. struct vhost_virtqueue *vq;
  498. struct vhost_vring_state s;
  499. u32 idx;
  500. long r;
  501. r = get_user(idx, (u32 __user *)argp);
  502. if (r < 0)
  503. return r;
  504. if (idx >= v->nvqs)
  505. return -ENOBUFS;
  506. idx = array_index_nospec(idx, v->nvqs);
  507. vq = &v->vqs[idx];
  508. switch (cmd) {
  509. case VHOST_VDPA_SET_VRING_ENABLE:
  510. if (copy_from_user(&s, argp, sizeof(s)))
  511. return -EFAULT;
  512. ops->set_vq_ready(vdpa, idx, s.num);
  513. return 0;
  514. case VHOST_VDPA_GET_VRING_GROUP:
  515. if (!ops->get_vq_group)
  516. return -EOPNOTSUPP;
  517. s.index = idx;
  518. s.num = ops->get_vq_group(vdpa, idx);
  519. if (s.num >= vdpa->ngroups)
  520. return -EIO;
  521. else if (copy_to_user(argp, &s, sizeof(s)))
  522. return -EFAULT;
  523. return 0;
  524. case VHOST_VDPA_GET_VRING_DESC_GROUP:
  525. if (!vhost_vdpa_has_desc_group(v))
  526. return -EOPNOTSUPP;
  527. s.index = idx;
  528. s.num = ops->get_vq_desc_group(vdpa, idx);
  529. if (s.num >= vdpa->ngroups)
  530. return -EIO;
  531. else if (copy_to_user(argp, &s, sizeof(s)))
  532. return -EFAULT;
  533. return 0;
  534. case VHOST_VDPA_SET_GROUP_ASID:
  535. if (copy_from_user(&s, argp, sizeof(s)))
  536. return -EFAULT;
  537. if (idx >= vdpa->ngroups || s.num >= vdpa->nas)
  538. return -EINVAL;
  539. if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK)
  540. return -EBUSY;
  541. if (!ops->set_group_asid)
  542. return -EOPNOTSUPP;
  543. return ops->set_group_asid(vdpa, idx, s.num);
  544. case VHOST_VDPA_GET_VRING_SIZE:
  545. if (!ops->get_vq_size)
  546. return -EOPNOTSUPP;
  547. s.index = idx;
  548. s.num = ops->get_vq_size(vdpa, idx);
  549. if (copy_to_user(argp, &s, sizeof(s)))
  550. return -EFAULT;
  551. return 0;
  552. case VHOST_GET_VRING_BASE:
  553. r = ops->get_vq_state(v->vdpa, idx, &vq_state);
  554. if (r)
  555. return r;
  556. if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
  557. vq->last_avail_idx = vq_state.packed.last_avail_idx |
  558. (vq_state.packed.last_avail_counter << 15);
  559. vq->last_used_idx = vq_state.packed.last_used_idx |
  560. (vq_state.packed.last_used_counter << 15);
  561. } else {
  562. vq->last_avail_idx = vq_state.split.avail_index;
  563. }
  564. break;
  565. case VHOST_SET_VRING_CALL:
  566. if (vq->call_ctx.ctx) {
  567. if (ops->get_status(vdpa) &
  568. VIRTIO_CONFIG_S_DRIVER_OK)
  569. vhost_vdpa_unsetup_vq_irq(v, idx);
  570. }
  571. break;
  572. }
  573. r = vhost_vring_ioctl(&v->vdev, cmd, argp);
  574. if (r)
  575. return r;
  576. switch (cmd) {
  577. case VHOST_SET_VRING_ADDR:
  578. if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
  579. return -EINVAL;
  580. if (ops->set_vq_address(vdpa, idx,
  581. (u64)(uintptr_t)vq->desc,
  582. (u64)(uintptr_t)vq->avail,
  583. (u64)(uintptr_t)vq->used))
  584. r = -EINVAL;
  585. break;
  586. case VHOST_SET_VRING_BASE:
  587. if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
  588. return -EINVAL;
  589. if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
  590. vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
  591. vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
  592. vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
  593. vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
  594. } else {
  595. vq_state.split.avail_index = vq->last_avail_idx;
  596. }
  597. r = ops->set_vq_state(vdpa, idx, &vq_state);
  598. break;
  599. case VHOST_SET_VRING_CALL:
  600. if (vq->call_ctx.ctx) {
  601. cb.callback = vhost_vdpa_virtqueue_cb;
  602. cb.private = vq;
  603. cb.trigger = vq->call_ctx.ctx;
  604. if (ops->get_status(vdpa) &
  605. VIRTIO_CONFIG_S_DRIVER_OK)
  606. vhost_vdpa_setup_vq_irq(v, idx);
  607. } else {
  608. cb.callback = NULL;
  609. cb.private = NULL;
  610. cb.trigger = NULL;
  611. }
  612. ops->set_vq_cb(vdpa, idx, &cb);
  613. break;
  614. case VHOST_SET_VRING_NUM:
  615. ops->set_vq_num(vdpa, idx, vq->num);
  616. break;
  617. }
  618. return r;
  619. }
  620. static long vhost_vdpa_unlocked_ioctl(struct file *filep,
  621. unsigned int cmd, unsigned long arg)
  622. {
  623. struct vhost_vdpa *v = filep->private_data;
  624. struct vhost_dev *d = &v->vdev;
  625. void __user *argp = (void __user *)arg;
  626. u64 __user *featurep = argp;
  627. u64 features;
  628. long r = 0;
  629. if (cmd == VHOST_SET_BACKEND_FEATURES) {
  630. if (copy_from_user(&features, featurep, sizeof(features)))
  631. return -EFAULT;
  632. if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
  633. BIT_ULL(VHOST_BACKEND_F_DESC_ASID) |
  634. BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) |
  635. BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
  636. BIT_ULL(VHOST_BACKEND_F_RESUME) |
  637. BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK)))
  638. return -EOPNOTSUPP;
  639. if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
  640. !vhost_vdpa_can_suspend(v))
  641. return -EOPNOTSUPP;
  642. if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
  643. !vhost_vdpa_can_resume(v))
  644. return -EOPNOTSUPP;
  645. if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
  646. !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)))
  647. return -EINVAL;
  648. if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
  649. !vhost_vdpa_has_desc_group(v))
  650. return -EOPNOTSUPP;
  651. if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) &&
  652. !vhost_vdpa_has_persistent_map(v))
  653. return -EOPNOTSUPP;
  654. vhost_set_backend_features(&v->vdev, features);
  655. return 0;
  656. }
  657. mutex_lock(&d->mutex);
  658. switch (cmd) {
  659. case VHOST_VDPA_GET_DEVICE_ID:
  660. r = vhost_vdpa_get_device_id(v, argp);
  661. break;
  662. case VHOST_VDPA_GET_STATUS:
  663. r = vhost_vdpa_get_status(v, argp);
  664. break;
  665. case VHOST_VDPA_SET_STATUS:
  666. r = vhost_vdpa_set_status(v, argp);
  667. break;
  668. case VHOST_VDPA_GET_CONFIG:
  669. r = vhost_vdpa_get_config(v, argp);
  670. break;
  671. case VHOST_VDPA_SET_CONFIG:
  672. r = vhost_vdpa_set_config(v, argp);
  673. break;
  674. case VHOST_GET_FEATURES:
  675. r = vhost_vdpa_get_features(v, argp);
  676. break;
  677. case VHOST_SET_FEATURES:
  678. r = vhost_vdpa_set_features(v, argp);
  679. break;
  680. case VHOST_VDPA_GET_VRING_NUM:
  681. r = vhost_vdpa_get_vring_num(v, argp);
  682. break;
  683. case VHOST_VDPA_GET_GROUP_NUM:
  684. if (copy_to_user(argp, &v->vdpa->ngroups,
  685. sizeof(v->vdpa->ngroups)))
  686. r = -EFAULT;
  687. break;
  688. case VHOST_VDPA_GET_AS_NUM:
  689. if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
  690. r = -EFAULT;
  691. break;
  692. case VHOST_SET_LOG_BASE:
  693. case VHOST_SET_LOG_FD:
  694. r = -ENOIOCTLCMD;
  695. break;
  696. case VHOST_VDPA_SET_CONFIG_CALL:
  697. r = vhost_vdpa_set_config_call(v, argp);
  698. break;
  699. case VHOST_GET_BACKEND_FEATURES:
  700. features = VHOST_VDPA_BACKEND_FEATURES;
  701. if (vhost_vdpa_can_suspend(v))
  702. features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
  703. if (vhost_vdpa_can_resume(v))
  704. features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
  705. if (vhost_vdpa_has_desc_group(v))
  706. features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID);
  707. if (vhost_vdpa_has_persistent_map(v))
  708. features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
  709. features |= vhost_vdpa_get_backend_features(v);
  710. if (copy_to_user(featurep, &features, sizeof(features)))
  711. r = -EFAULT;
  712. break;
  713. case VHOST_VDPA_GET_IOVA_RANGE:
  714. r = vhost_vdpa_get_iova_range(v, argp);
  715. break;
  716. case VHOST_VDPA_GET_CONFIG_SIZE:
  717. r = vhost_vdpa_get_config_size(v, argp);
  718. break;
  719. case VHOST_VDPA_GET_VQS_COUNT:
  720. r = vhost_vdpa_get_vqs_count(v, argp);
  721. break;
  722. case VHOST_VDPA_SUSPEND:
  723. r = vhost_vdpa_suspend(v);
  724. break;
  725. case VHOST_VDPA_RESUME:
  726. r = vhost_vdpa_resume(v);
  727. break;
  728. default:
  729. r = vhost_dev_ioctl(&v->vdev, cmd, argp);
  730. if (r == -ENOIOCTLCMD)
  731. r = vhost_vdpa_vring_ioctl(v, cmd, argp);
  732. break;
  733. }
  734. if (r)
  735. goto out;
  736. switch (cmd) {
  737. case VHOST_SET_OWNER:
  738. r = vhost_vdpa_bind_mm(v);
  739. if (r)
  740. vhost_dev_reset_owner(d, NULL);
  741. break;
  742. }
  743. out:
  744. mutex_unlock(&d->mutex);
  745. return r;
  746. }
  747. static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
  748. struct vhost_iotlb_map *map, u32 asid)
  749. {
  750. struct vdpa_device *vdpa = v->vdpa;
  751. const struct vdpa_config_ops *ops = vdpa->config;
  752. if (ops->dma_map) {
  753. ops->dma_unmap(vdpa, asid, map->start, map->size);
  754. } else if (ops->set_map == NULL) {
  755. iommu_unmap(v->domain, map->start, map->size);
  756. }
  757. }
  758. static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
  759. u64 start, u64 last, u32 asid)
  760. {
  761. struct vhost_dev *dev = &v->vdev;
  762. struct vhost_iotlb_map *map;
  763. struct page *page;
  764. unsigned long pfn, pinned;
  765. while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
  766. pinned = PFN_DOWN(map->size);
  767. for (pfn = PFN_DOWN(map->addr);
  768. pinned > 0; pfn++, pinned--) {
  769. page = pfn_to_page(pfn);
  770. if (map->perm & VHOST_ACCESS_WO)
  771. set_page_dirty_lock(page);
  772. unpin_user_page(page);
  773. }
  774. atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
  775. vhost_vdpa_general_unmap(v, map, asid);
  776. vhost_iotlb_map_free(iotlb, map);
  777. }
  778. }
  779. static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
  780. u64 start, u64 last, u32 asid)
  781. {
  782. struct vhost_iotlb_map *map;
  783. struct vdpa_map_file *map_file;
  784. while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
  785. map_file = (struct vdpa_map_file *)map->opaque;
  786. fput(map_file->file);
  787. kfree(map_file);
  788. vhost_vdpa_general_unmap(v, map, asid);
  789. vhost_iotlb_map_free(iotlb, map);
  790. }
  791. }
  792. static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  793. struct vhost_iotlb *iotlb, u64 start,
  794. u64 last, u32 asid)
  795. {
  796. struct vdpa_device *vdpa = v->vdpa;
  797. if (vdpa->use_va)
  798. return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
  799. return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
  800. }
  801. static int perm_to_iommu_flags(u32 perm)
  802. {
  803. int flags = 0;
  804. switch (perm) {
  805. case VHOST_ACCESS_WO:
  806. flags |= IOMMU_WRITE;
  807. break;
  808. case VHOST_ACCESS_RO:
  809. flags |= IOMMU_READ;
  810. break;
  811. case VHOST_ACCESS_RW:
  812. flags |= (IOMMU_WRITE | IOMMU_READ);
  813. break;
  814. default:
  815. WARN(1, "invalidate vhost IOTLB permission\n");
  816. break;
  817. }
  818. return flags | IOMMU_CACHE;
  819. }
  820. static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
  821. u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
  822. {
  823. struct vhost_dev *dev = &v->vdev;
  824. struct vdpa_device *vdpa = v->vdpa;
  825. const struct vdpa_config_ops *ops = vdpa->config;
  826. u32 asid = iotlb_to_asid(iotlb);
  827. int r = 0;
  828. r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
  829. pa, perm, opaque);
  830. if (r)
  831. return r;
  832. if (ops->dma_map) {
  833. r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
  834. } else if (ops->set_map) {
  835. if (!v->in_batch)
  836. r = ops->set_map(vdpa, asid, iotlb);
  837. } else {
  838. r = iommu_map(v->domain, iova, pa, size,
  839. perm_to_iommu_flags(perm),
  840. GFP_KERNEL_ACCOUNT);
  841. }
  842. if (r) {
  843. vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
  844. return r;
  845. }
  846. if (!vdpa->use_va)
  847. atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
  848. return 0;
  849. }
  850. static void vhost_vdpa_unmap(struct vhost_vdpa *v,
  851. struct vhost_iotlb *iotlb,
  852. u64 iova, u64 size)
  853. {
  854. struct vdpa_device *vdpa = v->vdpa;
  855. const struct vdpa_config_ops *ops = vdpa->config;
  856. u32 asid = iotlb_to_asid(iotlb);
  857. vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
  858. if (ops->set_map) {
  859. if (!v->in_batch)
  860. ops->set_map(vdpa, asid, iotlb);
  861. }
  862. }
  863. static int vhost_vdpa_va_map(struct vhost_vdpa *v,
  864. struct vhost_iotlb *iotlb,
  865. u64 iova, u64 size, u64 uaddr, u32 perm)
  866. {
  867. struct vhost_dev *dev = &v->vdev;
  868. u64 offset, map_size, map_iova = iova;
  869. struct vdpa_map_file *map_file;
  870. struct vm_area_struct *vma;
  871. int ret = 0;
  872. mmap_read_lock(dev->mm);
  873. while (size) {
  874. vma = find_vma(dev->mm, uaddr);
  875. if (!vma) {
  876. ret = -EINVAL;
  877. break;
  878. }
  879. map_size = min(size, vma->vm_end - uaddr);
  880. if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
  881. !(vma->vm_flags & (VM_IO | VM_PFNMAP))))
  882. goto next;
  883. map_file = kzalloc_obj(*map_file);
  884. if (!map_file) {
  885. ret = -ENOMEM;
  886. break;
  887. }
  888. offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
  889. map_file->offset = offset;
  890. map_file->file = get_file(vma->vm_file);
  891. ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
  892. perm, map_file);
  893. if (ret) {
  894. fput(map_file->file);
  895. kfree(map_file);
  896. break;
  897. }
  898. next:
  899. size -= map_size;
  900. uaddr += map_size;
  901. map_iova += map_size;
  902. }
  903. if (ret)
  904. vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
  905. mmap_read_unlock(dev->mm);
  906. return ret;
  907. }
  908. static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
  909. struct vhost_iotlb *iotlb,
  910. u64 iova, u64 size, u64 uaddr, u32 perm)
  911. {
  912. struct vhost_dev *dev = &v->vdev;
  913. struct page **page_list;
  914. unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  915. unsigned int gup_flags = FOLL_LONGTERM;
  916. unsigned long npages, cur_base, map_pfn, last_pfn = 0;
  917. unsigned long lock_limit, sz2pin, nchunks, i;
  918. u64 start = iova;
  919. long pinned;
  920. int ret = 0;
  921. /* Limit the use of memory for bookkeeping */
  922. page_list = (struct page **) __get_free_page(GFP_KERNEL);
  923. if (!page_list)
  924. return -ENOMEM;
  925. if (perm & VHOST_ACCESS_WO)
  926. gup_flags |= FOLL_WRITE;
  927. npages = PFN_UP(size + (iova & ~PAGE_MASK));
  928. if (!npages) {
  929. ret = -EINVAL;
  930. goto free;
  931. }
  932. mmap_read_lock(dev->mm);
  933. lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
  934. if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
  935. ret = -ENOMEM;
  936. goto unlock;
  937. }
  938. cur_base = uaddr & PAGE_MASK;
  939. iova &= PAGE_MASK;
  940. nchunks = 0;
  941. while (npages) {
  942. sz2pin = min_t(unsigned long, npages, list_size);
  943. pinned = pin_user_pages(cur_base, sz2pin,
  944. gup_flags, page_list);
  945. if (sz2pin != pinned) {
  946. if (pinned < 0) {
  947. ret = pinned;
  948. } else {
  949. unpin_user_pages(page_list, pinned);
  950. ret = -ENOMEM;
  951. }
  952. goto out;
  953. }
  954. nchunks++;
  955. if (!last_pfn)
  956. map_pfn = page_to_pfn(page_list[0]);
  957. for (i = 0; i < pinned; i++) {
  958. unsigned long this_pfn = page_to_pfn(page_list[i]);
  959. u64 csize;
  960. if (last_pfn && (this_pfn != last_pfn + 1)) {
  961. /* Pin a contiguous chunk of memory */
  962. csize = PFN_PHYS(last_pfn - map_pfn + 1);
  963. ret = vhost_vdpa_map(v, iotlb, iova, csize,
  964. PFN_PHYS(map_pfn),
  965. perm, NULL);
  966. if (ret) {
  967. /*
  968. * Unpin the pages that are left unmapped
  969. * from this point on in the current
  970. * page_list. The remaining outstanding
  971. * ones which may stride across several
  972. * chunks will be covered in the common
  973. * error path subsequently.
  974. */
  975. unpin_user_pages(&page_list[i],
  976. pinned - i);
  977. goto out;
  978. }
  979. map_pfn = this_pfn;
  980. iova += csize;
  981. nchunks = 0;
  982. }
  983. last_pfn = this_pfn;
  984. }
  985. cur_base += PFN_PHYS(pinned);
  986. npages -= pinned;
  987. }
  988. /* Pin the rest chunk */
  989. ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
  990. PFN_PHYS(map_pfn), perm, NULL);
  991. out:
  992. if (ret) {
  993. if (nchunks) {
  994. unsigned long pfn;
  995. /*
  996. * Unpin the outstanding pages which are yet to be
  997. * mapped but haven't due to vdpa_map() or
  998. * pin_user_pages() failure.
  999. *
  1000. * Mapped pages are accounted in vdpa_map(), hence
  1001. * the corresponding unpinning will be handled by
  1002. * vdpa_unmap().
  1003. */
  1004. WARN_ON(!last_pfn);
  1005. for (pfn = map_pfn; pfn <= last_pfn; pfn++)
  1006. unpin_user_page(pfn_to_page(pfn));
  1007. }
  1008. vhost_vdpa_unmap(v, iotlb, start, size);
  1009. }
  1010. unlock:
  1011. mmap_read_unlock(dev->mm);
  1012. free:
  1013. free_page((unsigned long)page_list);
  1014. return ret;
  1015. }
  1016. static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
  1017. struct vhost_iotlb *iotlb,
  1018. struct vhost_iotlb_msg *msg)
  1019. {
  1020. struct vdpa_device *vdpa = v->vdpa;
  1021. if (msg->iova < v->range.first || !msg->size ||
  1022. msg->iova > U64_MAX - msg->size + 1 ||
  1023. msg->iova + msg->size - 1 > v->range.last)
  1024. return -EINVAL;
  1025. if (vhost_iotlb_itree_first(iotlb, msg->iova,
  1026. msg->iova + msg->size - 1))
  1027. return -EEXIST;
  1028. if (vdpa->use_va)
  1029. return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
  1030. msg->uaddr, msg->perm);
  1031. return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
  1032. msg->perm);
  1033. }
  1034. static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
  1035. struct vhost_iotlb_msg *msg)
  1036. {
  1037. struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
  1038. struct vdpa_device *vdpa = v->vdpa;
  1039. const struct vdpa_config_ops *ops = vdpa->config;
  1040. struct vhost_iotlb *iotlb = NULL;
  1041. struct vhost_vdpa_as *as = NULL;
  1042. int r = 0;
  1043. mutex_lock(&dev->mutex);
  1044. r = vhost_dev_check_owner(dev);
  1045. if (r)
  1046. goto unlock;
  1047. if (msg->type == VHOST_IOTLB_UPDATE ||
  1048. msg->type == VHOST_IOTLB_BATCH_BEGIN) {
  1049. as = vhost_vdpa_find_alloc_as(v, asid);
  1050. if (!as) {
  1051. dev_err(&v->dev, "can't find and alloc asid %d\n",
  1052. asid);
  1053. r = -EINVAL;
  1054. goto unlock;
  1055. }
  1056. iotlb = &as->iotlb;
  1057. } else
  1058. iotlb = asid_to_iotlb(v, asid);
  1059. if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
  1060. if (v->in_batch && v->batch_asid != asid) {
  1061. dev_info(&v->dev, "batch id %d asid %d\n",
  1062. v->batch_asid, asid);
  1063. }
  1064. if (!iotlb)
  1065. dev_err(&v->dev, "no iotlb for asid %d\n", asid);
  1066. r = -EINVAL;
  1067. goto unlock;
  1068. }
  1069. switch (msg->type) {
  1070. case VHOST_IOTLB_UPDATE:
  1071. r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
  1072. break;
  1073. case VHOST_IOTLB_INVALIDATE:
  1074. vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
  1075. break;
  1076. case VHOST_IOTLB_BATCH_BEGIN:
  1077. v->batch_asid = asid;
  1078. v->in_batch = true;
  1079. break;
  1080. case VHOST_IOTLB_BATCH_END:
  1081. if (v->in_batch && ops->set_map)
  1082. ops->set_map(vdpa, asid, iotlb);
  1083. v->in_batch = false;
  1084. break;
  1085. default:
  1086. r = -EINVAL;
  1087. break;
  1088. }
  1089. unlock:
  1090. mutex_unlock(&dev->mutex);
  1091. return r;
  1092. }
  1093. static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
  1094. struct iov_iter *from)
  1095. {
  1096. struct file *file = iocb->ki_filp;
  1097. struct vhost_vdpa *v = file->private_data;
  1098. struct vhost_dev *dev = &v->vdev;
  1099. return vhost_chr_write_iter(dev, from);
  1100. }
  1101. static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
  1102. {
  1103. struct vdpa_device *vdpa = v->vdpa;
  1104. const struct vdpa_config_ops *ops = vdpa->config;
  1105. union virtio_map map = vdpa_get_map(vdpa);
  1106. struct device *dma_dev = map.dma_dev;
  1107. int ret;
  1108. /* Device want to do DMA by itself */
  1109. if (ops->set_map || ops->dma_map)
  1110. return 0;
  1111. if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
  1112. dev_warn_once(&v->dev,
  1113. "Failed to allocate domain, device is not IOMMU cache coherent capable\n");
  1114. return -ENOTSUPP;
  1115. }
  1116. v->domain = iommu_paging_domain_alloc(dma_dev);
  1117. if (IS_ERR(v->domain)) {
  1118. ret = PTR_ERR(v->domain);
  1119. v->domain = NULL;
  1120. return ret;
  1121. }
  1122. ret = iommu_attach_device(v->domain, dma_dev);
  1123. if (ret)
  1124. goto err_attach;
  1125. return 0;
  1126. err_attach:
  1127. iommu_domain_free(v->domain);
  1128. v->domain = NULL;
  1129. return ret;
  1130. }
  1131. static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
  1132. {
  1133. struct vdpa_device *vdpa = v->vdpa;
  1134. union virtio_map map = vdpa_get_map(vdpa);
  1135. struct device *dma_dev = map.dma_dev;
  1136. if (v->domain) {
  1137. iommu_detach_device(v->domain, dma_dev);
  1138. iommu_domain_free(v->domain);
  1139. }
  1140. v->domain = NULL;
  1141. }
  1142. static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
  1143. {
  1144. struct vdpa_iova_range *range = &v->range;
  1145. struct vdpa_device *vdpa = v->vdpa;
  1146. const struct vdpa_config_ops *ops = vdpa->config;
  1147. if (ops->get_iova_range) {
  1148. *range = ops->get_iova_range(vdpa);
  1149. } else if (v->domain && v->domain->geometry.force_aperture) {
  1150. range->first = v->domain->geometry.aperture_start;
  1151. range->last = v->domain->geometry.aperture_end;
  1152. } else {
  1153. range->first = 0;
  1154. range->last = ULLONG_MAX;
  1155. }
  1156. }
  1157. static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
  1158. {
  1159. struct vhost_vdpa_as *as;
  1160. u32 asid;
  1161. for (asid = 0; asid < v->vdpa->nas; asid++) {
  1162. as = asid_to_as(v, asid);
  1163. if (as)
  1164. vhost_vdpa_remove_as(v, asid);
  1165. }
  1166. vhost_vdpa_free_domain(v);
  1167. vhost_dev_cleanup(&v->vdev);
  1168. kfree(v->vdev.vqs);
  1169. v->vdev.vqs = NULL;
  1170. }
  1171. static int vhost_vdpa_open(struct inode *inode, struct file *filep)
  1172. {
  1173. struct vhost_vdpa *v;
  1174. struct vhost_dev *dev;
  1175. struct vhost_virtqueue **vqs;
  1176. int r, opened;
  1177. u32 i, nvqs;
  1178. v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
  1179. opened = atomic_cmpxchg(&v->opened, 0, 1);
  1180. if (opened)
  1181. return -EBUSY;
  1182. nvqs = v->nvqs;
  1183. r = vhost_vdpa_reset(v);
  1184. if (r)
  1185. goto err;
  1186. vqs = kmalloc_objs(*vqs, nvqs);
  1187. if (!vqs) {
  1188. r = -ENOMEM;
  1189. goto err;
  1190. }
  1191. dev = &v->vdev;
  1192. for (i = 0; i < nvqs; i++) {
  1193. vqs[i] = &v->vqs[i];
  1194. vqs[i]->handle_kick = handle_vq_kick;
  1195. vqs[i]->call_ctx.ctx = NULL;
  1196. }
  1197. vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
  1198. vhost_vdpa_process_iotlb_msg);
  1199. r = vhost_vdpa_alloc_domain(v);
  1200. if (r)
  1201. goto err_alloc_domain;
  1202. vhost_vdpa_set_iova_range(v);
  1203. filep->private_data = v;
  1204. return 0;
  1205. err_alloc_domain:
  1206. vhost_vdpa_cleanup(v);
  1207. err:
  1208. atomic_dec(&v->opened);
  1209. return r;
  1210. }
  1211. static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
  1212. {
  1213. u32 i;
  1214. for (i = 0; i < v->nvqs; i++)
  1215. vhost_vdpa_unsetup_vq_irq(v, i);
  1216. }
  1217. static int vhost_vdpa_release(struct inode *inode, struct file *filep)
  1218. {
  1219. struct vhost_vdpa *v = filep->private_data;
  1220. struct vhost_dev *d = &v->vdev;
  1221. mutex_lock(&d->mutex);
  1222. filep->private_data = NULL;
  1223. vhost_vdpa_clean_irq(v);
  1224. vhost_vdpa_reset(v);
  1225. vhost_dev_stop(&v->vdev);
  1226. vhost_vdpa_unbind_mm(v);
  1227. vhost_vdpa_config_put(v);
  1228. vhost_vdpa_cleanup(v);
  1229. mutex_unlock(&d->mutex);
  1230. atomic_dec(&v->opened);
  1231. complete(&v->completion);
  1232. return 0;
  1233. }
  1234. #ifdef CONFIG_MMU
  1235. static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
  1236. {
  1237. struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
  1238. struct vdpa_device *vdpa = v->vdpa;
  1239. const struct vdpa_config_ops *ops = vdpa->config;
  1240. struct vdpa_notification_area notify;
  1241. struct vm_area_struct *vma = vmf->vma;
  1242. u16 index = vma->vm_pgoff;
  1243. notify = ops->get_vq_notification(vdpa, index);
  1244. return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr));
  1245. }
  1246. static const struct vm_operations_struct vhost_vdpa_vm_ops = {
  1247. .fault = vhost_vdpa_fault,
  1248. };
  1249. static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
  1250. {
  1251. struct vhost_vdpa *v = vma->vm_file->private_data;
  1252. struct vdpa_device *vdpa = v->vdpa;
  1253. const struct vdpa_config_ops *ops = vdpa->config;
  1254. struct vdpa_notification_area notify;
  1255. unsigned long index = vma->vm_pgoff;
  1256. if (vma->vm_end - vma->vm_start != PAGE_SIZE)
  1257. return -EINVAL;
  1258. if ((vma->vm_flags & VM_SHARED) == 0)
  1259. return -EINVAL;
  1260. if (vma->vm_flags & VM_READ)
  1261. return -EINVAL;
  1262. if (index > 65535)
  1263. return -EINVAL;
  1264. if (!ops->get_vq_notification)
  1265. return -ENOTSUPP;
  1266. /* To be safe and easily modelled by userspace, We only
  1267. * support the doorbell which sits on the page boundary and
  1268. * does not share the page with other registers.
  1269. */
  1270. notify = ops->get_vq_notification(vdpa, index);
  1271. if (notify.addr & (PAGE_SIZE - 1))
  1272. return -EINVAL;
  1273. if (vma->vm_end - vma->vm_start != notify.size)
  1274. return -ENOTSUPP;
  1275. vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  1276. vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
  1277. vma->vm_ops = &vhost_vdpa_vm_ops;
  1278. return 0;
  1279. }
  1280. #endif /* CONFIG_MMU */
  1281. static const struct file_operations vhost_vdpa_fops = {
  1282. .owner = THIS_MODULE,
  1283. .open = vhost_vdpa_open,
  1284. .release = vhost_vdpa_release,
  1285. .write_iter = vhost_vdpa_chr_write_iter,
  1286. .unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
  1287. #ifdef CONFIG_MMU
  1288. .mmap = vhost_vdpa_mmap,
  1289. #endif /* CONFIG_MMU */
  1290. .compat_ioctl = compat_ptr_ioctl,
  1291. };
  1292. static void vhost_vdpa_release_dev(struct device *device)
  1293. {
  1294. struct vhost_vdpa *v =
  1295. container_of(device, struct vhost_vdpa, dev);
  1296. ida_free(&vhost_vdpa_ida, v->minor);
  1297. kfree(v->vqs);
  1298. kfree(v);
  1299. }
  1300. static int vhost_vdpa_probe(struct vdpa_device *vdpa)
  1301. {
  1302. const struct vdpa_config_ops *ops = vdpa->config;
  1303. struct vhost_vdpa *v;
  1304. int minor;
  1305. int i, r;
  1306. /* We can't support platform IOMMU device with more than 1
  1307. * group or as
  1308. */
  1309. if (!ops->set_map && !ops->dma_map &&
  1310. (vdpa->ngroups > 1 || vdpa->nas > 1))
  1311. return -EOPNOTSUPP;
  1312. v = kzalloc_obj(*v, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
  1313. if (!v)
  1314. return -ENOMEM;
  1315. minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1,
  1316. GFP_KERNEL);
  1317. if (minor < 0) {
  1318. kfree(v);
  1319. return minor;
  1320. }
  1321. atomic_set(&v->opened, 0);
  1322. v->minor = minor;
  1323. v->vdpa = vdpa;
  1324. v->nvqs = vdpa->nvqs;
  1325. v->virtio_id = ops->get_device_id(vdpa);
  1326. device_initialize(&v->dev);
  1327. v->dev.release = vhost_vdpa_release_dev;
  1328. v->dev.parent = &vdpa->dev;
  1329. v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
  1330. v->vqs = kmalloc_objs(struct vhost_virtqueue, v->nvqs);
  1331. if (!v->vqs) {
  1332. r = -ENOMEM;
  1333. goto err;
  1334. }
  1335. r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
  1336. if (r)
  1337. goto err;
  1338. cdev_init(&v->cdev, &vhost_vdpa_fops);
  1339. v->cdev.owner = THIS_MODULE;
  1340. r = cdev_device_add(&v->cdev, &v->dev);
  1341. if (r)
  1342. goto err;
  1343. init_completion(&v->completion);
  1344. vdpa_set_drvdata(vdpa, v);
  1345. for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
  1346. INIT_HLIST_HEAD(&v->as[i]);
  1347. return 0;
  1348. err:
  1349. put_device(&v->dev);
  1350. return r;
  1351. }
  1352. static void vhost_vdpa_remove(struct vdpa_device *vdpa)
  1353. {
  1354. struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
  1355. int opened;
  1356. cdev_device_del(&v->cdev, &v->dev);
  1357. do {
  1358. opened = atomic_cmpxchg(&v->opened, 0, 1);
  1359. if (!opened)
  1360. break;
  1361. wait_for_completion(&v->completion);
  1362. } while (1);
  1363. put_device(&v->dev);
  1364. }
  1365. static struct vdpa_driver vhost_vdpa_driver = {
  1366. .driver = {
  1367. .name = "vhost_vdpa",
  1368. },
  1369. .probe = vhost_vdpa_probe,
  1370. .remove = vhost_vdpa_remove,
  1371. };
  1372. static int __init vhost_vdpa_init(void)
  1373. {
  1374. int r;
  1375. r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
  1376. "vhost-vdpa");
  1377. if (r)
  1378. goto err_alloc_chrdev;
  1379. r = vdpa_register_driver(&vhost_vdpa_driver);
  1380. if (r)
  1381. goto err_vdpa_register_driver;
  1382. return 0;
  1383. err_vdpa_register_driver:
  1384. unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
  1385. err_alloc_chrdev:
  1386. return r;
  1387. }
  1388. module_init(vhost_vdpa_init);
  1389. static void __exit vhost_vdpa_exit(void)
  1390. {
  1391. vdpa_unregister_driver(&vhost_vdpa_driver);
  1392. unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
  1393. }
  1394. module_exit(vhost_vdpa_exit);
  1395. MODULE_VERSION("0.0.1");
  1396. MODULE_LICENSE("GPL v2");
  1397. MODULE_AUTHOR("Intel Corporation");
  1398. MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");