vringh.c 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Helpers for the host side of a virtio ring.
  4. *
  5. * Since these may be in userspace, we use (inline) accessors.
  6. */
  7. #include <linux/compiler.h>
  8. #include <linux/module.h>
  9. #include <linux/vringh.h>
  10. #include <linux/virtio_ring.h>
  11. #include <linux/kernel.h>
  12. #include <linux/ratelimit.h>
  13. #include <linux/uaccess.h>
  14. #include <linux/slab.h>
  15. #include <linux/export.h>
  16. #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
  17. #include <linux/bvec.h>
  18. #include <linux/highmem.h>
  19. #include <linux/vhost_iotlb.h>
  20. #endif
  21. #include <uapi/linux/virtio_config.h>
  22. static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
  23. {
  24. static DEFINE_RATELIMIT_STATE(vringh_rs,
  25. DEFAULT_RATELIMIT_INTERVAL,
  26. DEFAULT_RATELIMIT_BURST);
  27. if (__ratelimit(&vringh_rs)) {
  28. va_list ap;
  29. va_start(ap, fmt);
  30. printk(KERN_NOTICE "vringh:");
  31. vprintk(fmt, ap);
  32. va_end(ap);
  33. }
  34. }
  35. /* Returns vring->num if empty, -ve on error. */
  36. static inline int __vringh_get_head(const struct vringh *vrh,
  37. int (*getu16)(const struct vringh *vrh,
  38. u16 *val, const __virtio16 *p),
  39. u16 *last_avail_idx)
  40. {
  41. u16 avail_idx, i, head;
  42. int err;
  43. err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
  44. if (err) {
  45. vringh_bad("Failed to access avail idx at %p",
  46. &vrh->vring.avail->idx);
  47. return err;
  48. }
  49. if (*last_avail_idx == avail_idx)
  50. return vrh->vring.num;
  51. /* Only get avail ring entries after they have been exposed by guest. */
  52. virtio_rmb(vrh->weak_barriers);
  53. i = *last_avail_idx & (vrh->vring.num - 1);
  54. err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
  55. if (err) {
  56. vringh_bad("Failed to read head: idx %d address %p",
  57. *last_avail_idx, &vrh->vring.avail->ring[i]);
  58. return err;
  59. }
  60. if (head >= vrh->vring.num) {
  61. vringh_bad("Guest says index %u > %u is available",
  62. head, vrh->vring.num);
  63. return -EINVAL;
  64. }
  65. (*last_avail_idx)++;
  66. return head;
  67. }
  68. /**
  69. * vringh_kiov_advance - skip bytes from vring_kiov
  70. * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
  71. * @len: the maximum length to advance
  72. */
  73. void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
  74. {
  75. while (len && iov->i < iov->used) {
  76. size_t partlen = min(iov->iov[iov->i].iov_len, len);
  77. iov->consumed += partlen;
  78. iov->iov[iov->i].iov_len -= partlen;
  79. iov->iov[iov->i].iov_base += partlen;
  80. if (!iov->iov[iov->i].iov_len) {
  81. /* Fix up old iov element then increment. */
  82. iov->iov[iov->i].iov_len = iov->consumed;
  83. iov->iov[iov->i].iov_base -= iov->consumed;
  84. iov->consumed = 0;
  85. iov->i++;
  86. }
  87. len -= partlen;
  88. }
  89. }
  90. EXPORT_SYMBOL(vringh_kiov_advance);
  91. /* Copy some bytes to/from the iovec. Returns num copied. */
  92. static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
  93. struct vringh_kiov *iov,
  94. void *ptr, size_t len,
  95. int (*xfer)(const struct vringh *vrh,
  96. void *addr, void *ptr,
  97. size_t len))
  98. {
  99. int err, done = 0;
  100. while (len && iov->i < iov->used) {
  101. size_t partlen;
  102. partlen = min(iov->iov[iov->i].iov_len, len);
  103. err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen);
  104. if (err)
  105. return err;
  106. done += partlen;
  107. len -= partlen;
  108. ptr += partlen;
  109. iov->consumed += partlen;
  110. iov->iov[iov->i].iov_len -= partlen;
  111. iov->iov[iov->i].iov_base += partlen;
  112. if (!iov->iov[iov->i].iov_len) {
  113. /* Fix up old iov element then increment. */
  114. iov->iov[iov->i].iov_len = iov->consumed;
  115. iov->iov[iov->i].iov_base -= iov->consumed;
  116. iov->consumed = 0;
  117. iov->i++;
  118. }
  119. }
  120. return done;
  121. }
  122. /* May reduce *len if range is shorter. */
  123. static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
  124. struct vringh_range *range,
  125. bool (*getrange)(struct vringh *,
  126. u64, struct vringh_range *))
  127. {
  128. if (addr < range->start || addr > range->end_incl) {
  129. if (!getrange(vrh, addr, range))
  130. return false;
  131. }
  132. BUG_ON(addr < range->start || addr > range->end_incl);
  133. /* To end of memory? */
  134. if (unlikely(addr + *len == 0)) {
  135. if (range->end_incl == -1ULL)
  136. return true;
  137. goto truncate;
  138. }
  139. /* Otherwise, don't wrap. */
  140. if (addr + *len < addr) {
  141. vringh_bad("Wrapping descriptor %zu@0x%llx",
  142. *len, (unsigned long long)addr);
  143. return false;
  144. }
  145. if (unlikely(addr + *len - 1 > range->end_incl))
  146. goto truncate;
  147. return true;
  148. truncate:
  149. *len = range->end_incl + 1 - addr;
  150. return true;
  151. }
  152. static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
  153. struct vringh_range *range,
  154. bool (*getrange)(struct vringh *,
  155. u64, struct vringh_range *))
  156. {
  157. return true;
  158. }
  159. /* No reason for this code to be inline. */
  160. static int move_to_indirect(const struct vringh *vrh,
  161. int *up_next, u16 *i, void *addr,
  162. const struct vring_desc *desc,
  163. struct vring_desc **descs, int *desc_max)
  164. {
  165. u32 len;
  166. /* Indirect tables can't have indirect. */
  167. if (*up_next != -1) {
  168. vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
  169. return -EINVAL;
  170. }
  171. len = vringh32_to_cpu(vrh, desc->len);
  172. if (unlikely(len % sizeof(struct vring_desc))) {
  173. vringh_bad("Strange indirect len %u", desc->len);
  174. return -EINVAL;
  175. }
  176. /* We will check this when we follow it! */
  177. if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
  178. *up_next = vringh16_to_cpu(vrh, desc->next);
  179. else
  180. *up_next = -2;
  181. *descs = addr;
  182. *desc_max = len / sizeof(struct vring_desc);
  183. /* Now, start at the first indirect. */
  184. *i = 0;
  185. return 0;
  186. }
  187. static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
  188. {
  189. struct kvec *new;
  190. unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
  191. if (new_num < 8)
  192. new_num = 8;
  193. flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
  194. if (flag)
  195. new = krealloc_array(iov->iov, new_num, sizeof(*new), gfp);
  196. else {
  197. new = kmalloc_objs(*new, new_num, gfp);
  198. if (new) {
  199. memcpy(new, iov->iov,
  200. iov->max_num * sizeof(struct iovec));
  201. flag = VRINGH_IOV_ALLOCATED;
  202. }
  203. }
  204. if (!new)
  205. return -ENOMEM;
  206. iov->iov = new;
  207. iov->max_num = (new_num | flag);
  208. return 0;
  209. }
  210. static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
  211. struct vring_desc **descs, int *desc_max)
  212. {
  213. u16 i = *up_next;
  214. *up_next = -1;
  215. *descs = vrh->vring.desc;
  216. *desc_max = vrh->vring.num;
  217. return i;
  218. }
  219. static int slow_copy(struct vringh *vrh, void *dst, const void *src,
  220. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  221. struct vringh_range *range,
  222. bool (*getrange)(struct vringh *vrh,
  223. u64,
  224. struct vringh_range *)),
  225. bool (*getrange)(struct vringh *vrh,
  226. u64 addr,
  227. struct vringh_range *r),
  228. struct vringh_range *range,
  229. int (*copy)(const struct vringh *vrh,
  230. void *dst, const void *src, size_t len))
  231. {
  232. size_t part, len = sizeof(struct vring_desc);
  233. do {
  234. u64 addr;
  235. int err;
  236. part = len;
  237. addr = (u64)(unsigned long)src - range->offset;
  238. if (!rcheck(vrh, addr, &part, range, getrange))
  239. return -EINVAL;
  240. err = copy(vrh, dst, src, part);
  241. if (err)
  242. return err;
  243. dst += part;
  244. src += part;
  245. len -= part;
  246. } while (len);
  247. return 0;
  248. }
  249. static inline int
  250. __vringh_iov(struct vringh *vrh, u16 i,
  251. struct vringh_kiov *riov,
  252. struct vringh_kiov *wiov,
  253. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  254. struct vringh_range *range,
  255. bool (*getrange)(struct vringh *, u64,
  256. struct vringh_range *)),
  257. bool (*getrange)(struct vringh *, u64, struct vringh_range *),
  258. gfp_t gfp,
  259. int (*copy)(const struct vringh *vrh,
  260. void *dst, const void *src, size_t len))
  261. {
  262. int err, count = 0, indirect_count = 0, up_next, desc_max;
  263. struct vring_desc desc, *descs;
  264. struct vringh_range range = { -1ULL, 0 }, slowrange;
  265. bool slow = false;
  266. /* We start traversing vring's descriptor table. */
  267. descs = vrh->vring.desc;
  268. desc_max = vrh->vring.num;
  269. up_next = -1;
  270. /* You must want something! */
  271. if (WARN_ON(!riov && !wiov))
  272. return -EINVAL;
  273. if (riov)
  274. riov->i = riov->used = riov->consumed = 0;
  275. if (wiov)
  276. wiov->i = wiov->used = wiov->consumed = 0;
  277. for (;;) {
  278. void *addr;
  279. struct vringh_kiov *iov;
  280. size_t len;
  281. if (unlikely(slow))
  282. err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
  283. &slowrange, copy);
  284. else
  285. err = copy(vrh, &desc, &descs[i], sizeof(desc));
  286. if (unlikely(err))
  287. goto fail;
  288. if (unlikely(desc.flags &
  289. cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
  290. u64 a = vringh64_to_cpu(vrh, desc.addr);
  291. /* Make sure it's OK, and get offset. */
  292. len = vringh32_to_cpu(vrh, desc.len);
  293. if (!rcheck(vrh, a, &len, &range, getrange)) {
  294. err = -EINVAL;
  295. goto fail;
  296. }
  297. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  298. slow = true;
  299. /* We need to save this range to use offset */
  300. slowrange = range;
  301. }
  302. addr = (void *)(long)(a + range.offset);
  303. err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
  304. &descs, &desc_max);
  305. if (err)
  306. goto fail;
  307. continue;
  308. }
  309. if (up_next == -1)
  310. count++;
  311. else
  312. indirect_count++;
  313. if (count > vrh->vring.num || indirect_count > desc_max) {
  314. vringh_bad("Descriptor loop in %p", descs);
  315. err = -ELOOP;
  316. goto fail;
  317. }
  318. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
  319. iov = wiov;
  320. else {
  321. iov = riov;
  322. if (unlikely(wiov && wiov->used)) {
  323. vringh_bad("Readable desc %p after writable",
  324. &descs[i]);
  325. err = -EINVAL;
  326. goto fail;
  327. }
  328. }
  329. if (!iov) {
  330. vringh_bad("Unexpected %s desc",
  331. !wiov ? "writable" : "readable");
  332. err = -EPROTO;
  333. goto fail;
  334. }
  335. again:
  336. /* Make sure it's OK, and get offset. */
  337. len = vringh32_to_cpu(vrh, desc.len);
  338. if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
  339. getrange)) {
  340. err = -EINVAL;
  341. goto fail;
  342. }
  343. addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
  344. range.offset);
  345. if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
  346. err = resize_iovec(iov, gfp);
  347. if (err)
  348. goto fail;
  349. }
  350. iov->iov[iov->used].iov_base = addr;
  351. iov->iov[iov->used].iov_len = len;
  352. iov->used++;
  353. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  354. desc.len = cpu_to_vringh32(vrh,
  355. vringh32_to_cpu(vrh, desc.len) - len);
  356. desc.addr = cpu_to_vringh64(vrh,
  357. vringh64_to_cpu(vrh, desc.addr) + len);
  358. goto again;
  359. }
  360. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
  361. i = vringh16_to_cpu(vrh, desc.next);
  362. } else {
  363. /* Just in case we need to finish traversing above. */
  364. if (unlikely(up_next > 0)) {
  365. i = return_from_indirect(vrh, &up_next,
  366. &descs, &desc_max);
  367. slow = false;
  368. indirect_count = 0;
  369. } else
  370. break;
  371. }
  372. if (i >= desc_max) {
  373. vringh_bad("Chained index %u > %u", i, desc_max);
  374. err = -EINVAL;
  375. goto fail;
  376. }
  377. }
  378. return 0;
  379. fail:
  380. return err;
  381. }
  382. static inline int __vringh_complete(struct vringh *vrh,
  383. const struct vring_used_elem *used,
  384. unsigned int num_used,
  385. int (*putu16)(const struct vringh *vrh,
  386. __virtio16 *p, u16 val),
  387. int (*putused)(const struct vringh *vrh,
  388. struct vring_used_elem *dst,
  389. const struct vring_used_elem
  390. *src, unsigned num))
  391. {
  392. struct vring_used *used_ring;
  393. int err;
  394. u16 used_idx, off;
  395. used_ring = vrh->vring.used;
  396. used_idx = vrh->last_used_idx + vrh->completed;
  397. off = used_idx % vrh->vring.num;
  398. /* Compiler knows num_used == 1 sometimes, hence extra check */
  399. if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
  400. u16 part = vrh->vring.num - off;
  401. err = putused(vrh, &used_ring->ring[off], used, part);
  402. if (!err)
  403. err = putused(vrh, &used_ring->ring[0], used + part,
  404. num_used - part);
  405. } else
  406. err = putused(vrh, &used_ring->ring[off], used, num_used);
  407. if (err) {
  408. vringh_bad("Failed to write %u used entries %u at %p",
  409. num_used, off, &used_ring->ring[off]);
  410. return err;
  411. }
  412. /* Make sure buffer is written before we update index. */
  413. virtio_wmb(vrh->weak_barriers);
  414. err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
  415. if (err) {
  416. vringh_bad("Failed to update used index at %p",
  417. &vrh->vring.used->idx);
  418. return err;
  419. }
  420. vrh->completed += num_used;
  421. return 0;
  422. }
  423. static inline int __vringh_need_notify(struct vringh *vrh,
  424. int (*getu16)(const struct vringh *vrh,
  425. u16 *val,
  426. const __virtio16 *p))
  427. {
  428. bool notify;
  429. u16 used_event;
  430. int err;
  431. /* Flush out used index update. This is paired with the
  432. * barrier that the Guest executes when enabling
  433. * interrupts. */
  434. virtio_mb(vrh->weak_barriers);
  435. /* Old-style, without event indices. */
  436. if (!vrh->event_indices) {
  437. u16 flags;
  438. err = getu16(vrh, &flags, &vrh->vring.avail->flags);
  439. if (err) {
  440. vringh_bad("Failed to get flags at %p",
  441. &vrh->vring.avail->flags);
  442. return err;
  443. }
  444. return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
  445. }
  446. /* Modern: we know when other side wants to know. */
  447. err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
  448. if (err) {
  449. vringh_bad("Failed to get used event idx at %p",
  450. &vring_used_event(&vrh->vring));
  451. return err;
  452. }
  453. /* Just in case we added so many that we wrap. */
  454. if (unlikely(vrh->completed > 0xffff))
  455. notify = true;
  456. else
  457. notify = vring_need_event(used_event,
  458. vrh->last_used_idx + vrh->completed,
  459. vrh->last_used_idx);
  460. vrh->last_used_idx += vrh->completed;
  461. vrh->completed = 0;
  462. return notify;
  463. }
  464. static inline bool __vringh_notify_enable(struct vringh *vrh,
  465. int (*getu16)(const struct vringh *vrh,
  466. u16 *val, const __virtio16 *p),
  467. int (*putu16)(const struct vringh *vrh,
  468. __virtio16 *p, u16 val))
  469. {
  470. u16 avail;
  471. if (!vrh->event_indices) {
  472. /* Old-school; update flags. */
  473. if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
  474. vringh_bad("Clearing used flags %p",
  475. &vrh->vring.used->flags);
  476. return true;
  477. }
  478. } else {
  479. if (putu16(vrh, &vring_avail_event(&vrh->vring),
  480. vrh->last_avail_idx) != 0) {
  481. vringh_bad("Updating avail event index %p",
  482. &vring_avail_event(&vrh->vring));
  483. return true;
  484. }
  485. }
  486. /* They could have slipped one in as we were doing that: make
  487. * sure it's written, then check again. */
  488. virtio_mb(vrh->weak_barriers);
  489. if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
  490. vringh_bad("Failed to check avail idx at %p",
  491. &vrh->vring.avail->idx);
  492. return true;
  493. }
  494. /* This is unlikely, so we just leave notifications enabled
  495. * (if we're using event_indices, we'll only get one
  496. * notification anyway). */
  497. return avail == vrh->last_avail_idx;
  498. }
  499. static inline void __vringh_notify_disable(struct vringh *vrh,
  500. int (*putu16)(const struct vringh *vrh,
  501. __virtio16 *p, u16 val))
  502. {
  503. if (!vrh->event_indices) {
  504. /* Old-school; update flags. */
  505. if (putu16(vrh, &vrh->vring.used->flags,
  506. VRING_USED_F_NO_NOTIFY)) {
  507. vringh_bad("Setting used flags %p",
  508. &vrh->vring.used->flags);
  509. }
  510. }
  511. }
  512. /* Userspace access helpers: in this case, addresses are really userspace. */
  513. static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
  514. {
  515. __virtio16 v = 0;
  516. int rc = get_user(v, (__force __virtio16 __user *)p);
  517. *val = vringh16_to_cpu(vrh, v);
  518. return rc;
  519. }
  520. static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
  521. {
  522. __virtio16 v = cpu_to_vringh16(vrh, val);
  523. return put_user(v, (__force __virtio16 __user *)p);
  524. }
  525. static inline int copydesc_user(const struct vringh *vrh,
  526. void *dst, const void *src, size_t len)
  527. {
  528. return copy_from_user(dst, (__force void __user *)src, len) ?
  529. -EFAULT : 0;
  530. }
  531. static inline int putused_user(const struct vringh *vrh,
  532. struct vring_used_elem *dst,
  533. const struct vring_used_elem *src,
  534. unsigned int num)
  535. {
  536. return copy_to_user((__force void __user *)dst, src,
  537. sizeof(*dst) * num) ? -EFAULT : 0;
  538. }
  539. static inline int xfer_from_user(const struct vringh *vrh, void *src,
  540. void *dst, size_t len)
  541. {
  542. return copy_from_user(dst, (__force void __user *)src, len) ?
  543. -EFAULT : 0;
  544. }
  545. static inline int xfer_to_user(const struct vringh *vrh,
  546. void *dst, void *src, size_t len)
  547. {
  548. return copy_to_user((__force void __user *)dst, src, len) ?
  549. -EFAULT : 0;
  550. }
  551. /**
  552. * vringh_init_user - initialize a vringh for a userspace vring.
  553. * @vrh: the vringh to initialize.
  554. * @features: the feature bits for this ring.
  555. * @num: the number of elements.
  556. * @weak_barriers: true if we only need memory barriers, not I/O.
  557. * @desc: the userspace descriptor pointer.
  558. * @avail: the userspace avail pointer.
  559. * @used: the userspace used pointer.
  560. *
  561. * Returns an error if num is invalid: you should check pointers
  562. * yourself!
  563. */
  564. int vringh_init_user(struct vringh *vrh, u64 features,
  565. unsigned int num, bool weak_barriers,
  566. vring_desc_t __user *desc,
  567. vring_avail_t __user *avail,
  568. vring_used_t __user *used)
  569. {
  570. /* Sane power of 2 please! */
  571. if (!num || num > 0xffff || (num & (num - 1))) {
  572. vringh_bad("Bad ring size %u", num);
  573. return -EINVAL;
  574. }
  575. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  576. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  577. vrh->weak_barriers = weak_barriers;
  578. vrh->completed = 0;
  579. vrh->last_avail_idx = 0;
  580. vrh->last_used_idx = 0;
  581. vrh->vring.num = num;
  582. /* vring expects kernel addresses, but only used via accessors. */
  583. vrh->vring.desc = (__force struct vring_desc *)desc;
  584. vrh->vring.avail = (__force struct vring_avail *)avail;
  585. vrh->vring.used = (__force struct vring_used *)used;
  586. return 0;
  587. }
  588. EXPORT_SYMBOL(vringh_init_user);
  589. /**
  590. * vringh_getdesc_user - get next available descriptor from userspace ring.
  591. * @vrh: the userspace vring.
  592. * @riov: where to put the readable descriptors (or NULL)
  593. * @wiov: where to put the writable descriptors (or NULL)
  594. * @getrange: function to call to check ranges.
  595. * @head: head index we received, for passing to vringh_complete_user().
  596. *
  597. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  598. *
  599. * Note that on error return, you can tell the difference between an
  600. * invalid ring and a single invalid descriptor: in the former case,
  601. * *head will be vrh->vring.num. You may be able to ignore an invalid
  602. * descriptor, but there's not much you can do with an invalid ring.
  603. *
  604. * Note that you can reuse riov and wiov with subsequent calls. Content is
  605. * overwritten and memory reallocated if more space is needed.
  606. * When you don't have to use riov and wiov anymore, you should clean up them
  607. * calling vringh_iov_cleanup() to release the memory, even on error!
  608. */
  609. int vringh_getdesc_user(struct vringh *vrh,
  610. struct vringh_iov *riov,
  611. struct vringh_iov *wiov,
  612. bool (*getrange)(struct vringh *vrh,
  613. u64 addr, struct vringh_range *r),
  614. u16 *head)
  615. {
  616. int err;
  617. *head = vrh->vring.num;
  618. err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
  619. if (err < 0)
  620. return err;
  621. /* Empty... */
  622. if (err == vrh->vring.num)
  623. return 0;
  624. /* We need the layouts to be the identical for this to work */
  625. BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
  626. BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
  627. offsetof(struct vringh_iov, iov));
  628. BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
  629. offsetof(struct vringh_iov, i));
  630. BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
  631. offsetof(struct vringh_iov, used));
  632. BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
  633. offsetof(struct vringh_iov, max_num));
  634. BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
  635. BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
  636. offsetof(struct kvec, iov_base));
  637. BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
  638. offsetof(struct kvec, iov_len));
  639. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
  640. != sizeof(((struct kvec *)NULL)->iov_base));
  641. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
  642. != sizeof(((struct kvec *)NULL)->iov_len));
  643. *head = err;
  644. err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
  645. (struct vringh_kiov *)wiov,
  646. range_check, getrange, GFP_KERNEL, copydesc_user);
  647. if (err)
  648. return err;
  649. return 1;
  650. }
  651. EXPORT_SYMBOL(vringh_getdesc_user);
  652. /**
  653. * vringh_iov_pull_user - copy bytes from vring_iov.
  654. * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
  655. * @dst: the place to copy.
  656. * @len: the maximum length to copy.
  657. *
  658. * Returns the bytes copied <= len or a negative errno.
  659. */
  660. ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
  661. {
  662. return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov,
  663. dst, len, xfer_from_user);
  664. }
  665. EXPORT_SYMBOL(vringh_iov_pull_user);
  666. /**
  667. * vringh_iov_push_user - copy bytes into vring_iov.
  668. * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
  669. * @src: the place to copy from.
  670. * @len: the maximum length to copy.
  671. *
  672. * Returns the bytes copied <= len or a negative errno.
  673. */
  674. ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
  675. const void *src, size_t len)
  676. {
  677. return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov,
  678. (void *)src, len, xfer_to_user);
  679. }
  680. EXPORT_SYMBOL(vringh_iov_push_user);
  681. /**
  682. * vringh_complete_user - we've finished with descriptor, publish it.
  683. * @vrh: the vring.
  684. * @head: the head as filled in by vringh_getdesc_user.
  685. * @len: the length of data we have written.
  686. *
  687. * You should check vringh_need_notify_user() after one or more calls
  688. * to this function.
  689. */
  690. int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
  691. {
  692. struct vring_used_elem used;
  693. used.id = cpu_to_vringh32(vrh, head);
  694. used.len = cpu_to_vringh32(vrh, len);
  695. return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
  696. }
  697. EXPORT_SYMBOL(vringh_complete_user);
  698. /**
  699. * vringh_complete_multi_user - we've finished with many descriptors.
  700. * @vrh: the vring.
  701. * @used: the head, length pairs.
  702. * @num_used: the number of used elements.
  703. *
  704. * You should check vringh_need_notify_user() after one or more calls
  705. * to this function.
  706. */
  707. int vringh_complete_multi_user(struct vringh *vrh,
  708. const struct vring_used_elem used[],
  709. unsigned num_used)
  710. {
  711. return __vringh_complete(vrh, used, num_used,
  712. putu16_user, putused_user);
  713. }
  714. EXPORT_SYMBOL(vringh_complete_multi_user);
  715. /**
  716. * vringh_notify_enable_user - we want to know if something changes.
  717. * @vrh: the vring.
  718. *
  719. * This always enables notifications, but returns false if there are
  720. * now more buffers available in the vring.
  721. */
  722. bool vringh_notify_enable_user(struct vringh *vrh)
  723. {
  724. return __vringh_notify_enable(vrh, getu16_user, putu16_user);
  725. }
  726. EXPORT_SYMBOL(vringh_notify_enable_user);
  727. /**
  728. * vringh_notify_disable_user - don't tell us if something changes.
  729. * @vrh: the vring.
  730. *
  731. * This is our normal running state: we disable and then only enable when
  732. * we're going to sleep.
  733. */
  734. void vringh_notify_disable_user(struct vringh *vrh)
  735. {
  736. __vringh_notify_disable(vrh, putu16_user);
  737. }
  738. EXPORT_SYMBOL(vringh_notify_disable_user);
  739. /**
  740. * vringh_need_notify_user - must we tell the other side about used buffers?
  741. * @vrh: the vring we've called vringh_complete_user() on.
  742. *
  743. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  744. */
  745. int vringh_need_notify_user(struct vringh *vrh)
  746. {
  747. return __vringh_need_notify(vrh, getu16_user);
  748. }
  749. EXPORT_SYMBOL(vringh_need_notify_user);
  750. /* Kernelspace access helpers. */
  751. static inline int getu16_kern(const struct vringh *vrh,
  752. u16 *val, const __virtio16 *p)
  753. {
  754. *val = vringh16_to_cpu(vrh, READ_ONCE(*p));
  755. return 0;
  756. }
  757. static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
  758. {
  759. WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
  760. return 0;
  761. }
  762. static inline int copydesc_kern(const struct vringh *vrh,
  763. void *dst, const void *src, size_t len)
  764. {
  765. memcpy(dst, src, len);
  766. return 0;
  767. }
  768. static inline int putused_kern(const struct vringh *vrh,
  769. struct vring_used_elem *dst,
  770. const struct vring_used_elem *src,
  771. unsigned int num)
  772. {
  773. memcpy(dst, src, num * sizeof(*dst));
  774. return 0;
  775. }
  776. /**
  777. * vringh_init_kern - initialize a vringh for a kernelspace vring.
  778. * @vrh: the vringh to initialize.
  779. * @features: the feature bits for this ring.
  780. * @num: the number of elements.
  781. * @weak_barriers: true if we only need memory barriers, not I/O.
  782. * @desc: the userspace descriptor pointer.
  783. * @avail: the userspace avail pointer.
  784. * @used: the userspace used pointer.
  785. *
  786. * Returns an error if num is invalid.
  787. */
  788. int vringh_init_kern(struct vringh *vrh, u64 features,
  789. unsigned int num, bool weak_barriers,
  790. struct vring_desc *desc,
  791. struct vring_avail *avail,
  792. struct vring_used *used)
  793. {
  794. /* Sane power of 2 please! */
  795. if (!num || num > 0xffff || (num & (num - 1))) {
  796. vringh_bad("Bad ring size %u", num);
  797. return -EINVAL;
  798. }
  799. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  800. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  801. vrh->weak_barriers = weak_barriers;
  802. vrh->completed = 0;
  803. vrh->last_avail_idx = 0;
  804. vrh->last_used_idx = 0;
  805. vrh->vring.num = num;
  806. vrh->vring.desc = desc;
  807. vrh->vring.avail = avail;
  808. vrh->vring.used = used;
  809. return 0;
  810. }
  811. EXPORT_SYMBOL(vringh_init_kern);
  812. /**
  813. * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
  814. * @vrh: the kernelspace vring.
  815. * @riov: where to put the readable descriptors (or NULL)
  816. * @wiov: where to put the writable descriptors (or NULL)
  817. * @head: head index we received, for passing to vringh_complete_kern().
  818. * @gfp: flags for allocating larger riov/wiov.
  819. *
  820. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  821. *
  822. * Note that on error return, you can tell the difference between an
  823. * invalid ring and a single invalid descriptor: in the former case,
  824. * *head will be vrh->vring.num. You may be able to ignore an invalid
  825. * descriptor, but there's not much you can do with an invalid ring.
  826. *
  827. * Note that you can reuse riov and wiov with subsequent calls. Content is
  828. * overwritten and memory reallocated if more space is needed.
  829. * When you don't have to use riov and wiov anymore, you should clean up them
  830. * calling vringh_kiov_cleanup() to release the memory, even on error!
  831. */
  832. int vringh_getdesc_kern(struct vringh *vrh,
  833. struct vringh_kiov *riov,
  834. struct vringh_kiov *wiov,
  835. u16 *head,
  836. gfp_t gfp)
  837. {
  838. int err;
  839. err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
  840. if (err < 0)
  841. return err;
  842. /* Empty... */
  843. if (err == vrh->vring.num)
  844. return 0;
  845. *head = err;
  846. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  847. gfp, copydesc_kern);
  848. if (err)
  849. return err;
  850. return 1;
  851. }
  852. EXPORT_SYMBOL(vringh_getdesc_kern);
  853. /**
  854. * vringh_complete_kern - we've finished with descriptor, publish it.
  855. * @vrh: the vring.
  856. * @head: the head as filled in by vringh_getdesc_kern.
  857. * @len: the length of data we have written.
  858. *
  859. * You should check vringh_need_notify_kern() after one or more calls
  860. * to this function.
  861. */
  862. int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
  863. {
  864. struct vring_used_elem used;
  865. used.id = cpu_to_vringh32(vrh, head);
  866. used.len = cpu_to_vringh32(vrh, len);
  867. return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
  868. }
  869. EXPORT_SYMBOL(vringh_complete_kern);
  870. /**
  871. * vringh_notify_enable_kern - we want to know if something changes.
  872. * @vrh: the vring.
  873. *
  874. * This always enables notifications, but returns false if there are
  875. * now more buffers available in the vring.
  876. */
  877. bool vringh_notify_enable_kern(struct vringh *vrh)
  878. {
  879. return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
  880. }
  881. EXPORT_SYMBOL(vringh_notify_enable_kern);
  882. /**
  883. * vringh_notify_disable_kern - don't tell us if something changes.
  884. * @vrh: the vring.
  885. *
  886. * This is our normal running state: we disable and then only enable when
  887. * we're going to sleep.
  888. */
  889. void vringh_notify_disable_kern(struct vringh *vrh)
  890. {
  891. __vringh_notify_disable(vrh, putu16_kern);
  892. }
  893. EXPORT_SYMBOL(vringh_notify_disable_kern);
  894. /**
  895. * vringh_need_notify_kern - must we tell the other side about used buffers?
  896. * @vrh: the vring we've called vringh_complete_kern() on.
  897. *
  898. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  899. */
  900. int vringh_need_notify_kern(struct vringh *vrh)
  901. {
  902. return __vringh_need_notify(vrh, getu16_kern);
  903. }
  904. EXPORT_SYMBOL(vringh_need_notify_kern);
  905. #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
  906. struct iotlb_vec {
  907. union {
  908. struct iovec *iovec;
  909. struct bio_vec *bvec;
  910. } iov;
  911. size_t count;
  912. };
  913. static int iotlb_translate(const struct vringh *vrh,
  914. u64 addr, u64 len, u64 *translated,
  915. struct iotlb_vec *ivec, u32 perm)
  916. {
  917. struct vhost_iotlb_map *map;
  918. struct vhost_iotlb *iotlb = vrh->iotlb;
  919. int ret = 0;
  920. u64 s = 0, last = addr + len - 1;
  921. spin_lock(vrh->iotlb_lock);
  922. while (len > s) {
  923. uintptr_t io_addr;
  924. size_t io_len;
  925. u64 size;
  926. if (unlikely(ret >= ivec->count)) {
  927. ret = -ENOBUFS;
  928. break;
  929. }
  930. map = vhost_iotlb_itree_first(iotlb, addr, last);
  931. if (!map || map->start > addr) {
  932. ret = -EINVAL;
  933. break;
  934. } else if (!(map->perm & perm)) {
  935. ret = -EPERM;
  936. break;
  937. }
  938. size = map->size - addr + map->start;
  939. io_len = min(len - s, size);
  940. io_addr = map->addr - map->start + addr;
  941. if (vrh->use_va) {
  942. struct iovec *iovec = ivec->iov.iovec;
  943. iovec[ret].iov_len = io_len;
  944. iovec[ret].iov_base = (void __user *)io_addr;
  945. } else {
  946. u64 pfn = io_addr >> PAGE_SHIFT;
  947. struct bio_vec *bvec = ivec->iov.bvec;
  948. bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len,
  949. io_addr & (PAGE_SIZE - 1));
  950. }
  951. s += size;
  952. addr += size;
  953. ++ret;
  954. }
  955. spin_unlock(vrh->iotlb_lock);
  956. if (translated)
  957. *translated = min(len, s);
  958. return ret;
  959. }
  960. #define IOTLB_IOV_STRIDE 16
  961. static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
  962. void *src, size_t len)
  963. {
  964. struct iotlb_vec ivec;
  965. union {
  966. struct iovec iovec[IOTLB_IOV_STRIDE];
  967. struct bio_vec bvec[IOTLB_IOV_STRIDE];
  968. } iov;
  969. u64 total_translated = 0;
  970. ivec.iov.iovec = iov.iovec;
  971. ivec.count = IOTLB_IOV_STRIDE;
  972. while (total_translated < len) {
  973. struct iov_iter iter;
  974. u64 translated;
  975. int ret;
  976. size_t size;
  977. ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
  978. len - total_translated, &translated,
  979. &ivec, VHOST_MAP_RO);
  980. if (ret == -ENOBUFS)
  981. ret = IOTLB_IOV_STRIDE;
  982. else if (ret < 0)
  983. return ret;
  984. if (vrh->use_va) {
  985. iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret,
  986. translated);
  987. } else {
  988. iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret,
  989. translated);
  990. }
  991. size = copy_from_iter(dst, translated, &iter);
  992. if (size != translated)
  993. return -EFAULT;
  994. src += translated;
  995. dst += translated;
  996. total_translated += translated;
  997. }
  998. return total_translated;
  999. }
  1000. static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
  1001. void *src, size_t len)
  1002. {
  1003. struct iotlb_vec ivec;
  1004. union {
  1005. struct iovec iovec[IOTLB_IOV_STRIDE];
  1006. struct bio_vec bvec[IOTLB_IOV_STRIDE];
  1007. } iov;
  1008. u64 total_translated = 0;
  1009. ivec.iov.iovec = iov.iovec;
  1010. ivec.count = IOTLB_IOV_STRIDE;
  1011. while (total_translated < len) {
  1012. struct iov_iter iter;
  1013. u64 translated;
  1014. int ret;
  1015. size_t size;
  1016. ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
  1017. len - total_translated, &translated,
  1018. &ivec, VHOST_MAP_WO);
  1019. if (ret == -ENOBUFS)
  1020. ret = IOTLB_IOV_STRIDE;
  1021. else if (ret < 0)
  1022. return ret;
  1023. if (vrh->use_va) {
  1024. iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret,
  1025. translated);
  1026. } else {
  1027. iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret,
  1028. translated);
  1029. }
  1030. size = copy_to_iter(src, translated, &iter);
  1031. if (size != translated)
  1032. return -EFAULT;
  1033. src += translated;
  1034. dst += translated;
  1035. total_translated += translated;
  1036. }
  1037. return total_translated;
  1038. }
  1039. static inline int getu16_iotlb(const struct vringh *vrh,
  1040. u16 *val, const __virtio16 *p)
  1041. {
  1042. struct iotlb_vec ivec;
  1043. union {
  1044. struct iovec iovec[1];
  1045. struct bio_vec bvec[1];
  1046. } iov;
  1047. __virtio16 tmp;
  1048. int ret;
  1049. ivec.iov.iovec = iov.iovec;
  1050. ivec.count = 1;
  1051. /* Atomic read is needed for getu16 */
  1052. ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
  1053. NULL, &ivec, VHOST_MAP_RO);
  1054. if (ret < 0)
  1055. return ret;
  1056. if (vrh->use_va) {
  1057. ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
  1058. if (ret)
  1059. return ret;
  1060. } else {
  1061. __virtio16 *from = bvec_kmap_local(&ivec.iov.bvec[0]);
  1062. tmp = READ_ONCE(*from);
  1063. kunmap_local(from);
  1064. }
  1065. *val = vringh16_to_cpu(vrh, tmp);
  1066. return 0;
  1067. }
  1068. static inline int putu16_iotlb(const struct vringh *vrh,
  1069. __virtio16 *p, u16 val)
  1070. {
  1071. struct iotlb_vec ivec;
  1072. union {
  1073. struct iovec iovec;
  1074. struct bio_vec bvec;
  1075. } iov;
  1076. __virtio16 tmp;
  1077. int ret;
  1078. ivec.iov.iovec = &iov.iovec;
  1079. ivec.count = 1;
  1080. /* Atomic write is needed for putu16 */
  1081. ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
  1082. NULL, &ivec, VHOST_MAP_RO);
  1083. if (ret < 0)
  1084. return ret;
  1085. tmp = cpu_to_vringh16(vrh, val);
  1086. if (vrh->use_va) {
  1087. ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
  1088. if (ret)
  1089. return ret;
  1090. } else {
  1091. __virtio16 *to = bvec_kmap_local(&ivec.iov.bvec[0]);
  1092. WRITE_ONCE(*to, tmp);
  1093. kunmap_local(to);
  1094. }
  1095. return 0;
  1096. }
  1097. static inline int copydesc_iotlb(const struct vringh *vrh,
  1098. void *dst, const void *src, size_t len)
  1099. {
  1100. int ret;
  1101. ret = copy_from_iotlb(vrh, dst, (void *)src, len);
  1102. if (ret != len)
  1103. return -EFAULT;
  1104. return 0;
  1105. }
  1106. static inline int xfer_from_iotlb(const struct vringh *vrh, void *src,
  1107. void *dst, size_t len)
  1108. {
  1109. int ret;
  1110. ret = copy_from_iotlb(vrh, dst, src, len);
  1111. if (ret != len)
  1112. return -EFAULT;
  1113. return 0;
  1114. }
  1115. static inline int xfer_to_iotlb(const struct vringh *vrh,
  1116. void *dst, void *src, size_t len)
  1117. {
  1118. int ret;
  1119. ret = copy_to_iotlb(vrh, dst, src, len);
  1120. if (ret != len)
  1121. return -EFAULT;
  1122. return 0;
  1123. }
  1124. static inline int putused_iotlb(const struct vringh *vrh,
  1125. struct vring_used_elem *dst,
  1126. const struct vring_used_elem *src,
  1127. unsigned int num)
  1128. {
  1129. int size = num * sizeof(*dst);
  1130. int ret;
  1131. ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst));
  1132. if (ret != size)
  1133. return -EFAULT;
  1134. return 0;
  1135. }
  1136. /**
  1137. * vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
  1138. * @vrh: the vringh to initialize.
  1139. * @features: the feature bits for this ring.
  1140. * @num: the number of elements.
  1141. * @weak_barriers: true if we only need memory barriers, not I/O.
  1142. * @desc: the userspace descriptor pointer.
  1143. * @avail: the userspace avail pointer.
  1144. * @used: the userspace used pointer.
  1145. *
  1146. * Returns an error if num is invalid.
  1147. */
  1148. int vringh_init_iotlb(struct vringh *vrh, u64 features,
  1149. unsigned int num, bool weak_barriers,
  1150. struct vring_desc *desc,
  1151. struct vring_avail *avail,
  1152. struct vring_used *used)
  1153. {
  1154. vrh->use_va = false;
  1155. return vringh_init_kern(vrh, features, num, weak_barriers,
  1156. desc, avail, used);
  1157. }
  1158. EXPORT_SYMBOL(vringh_init_iotlb);
  1159. /**
  1160. * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing
  1161. * user VA.
  1162. * @vrh: the vringh to initialize.
  1163. * @features: the feature bits for this ring.
  1164. * @num: the number of elements.
  1165. * @weak_barriers: true if we only need memory barriers, not I/O.
  1166. * @desc: the userspace descriptor pointer.
  1167. * @avail: the userspace avail pointer.
  1168. * @used: the userspace used pointer.
  1169. *
  1170. * Returns an error if num is invalid.
  1171. */
  1172. int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
  1173. unsigned int num, bool weak_barriers,
  1174. struct vring_desc *desc,
  1175. struct vring_avail *avail,
  1176. struct vring_used *used)
  1177. {
  1178. vrh->use_va = true;
  1179. return vringh_init_kern(vrh, features, num, weak_barriers,
  1180. desc, avail, used);
  1181. }
  1182. EXPORT_SYMBOL(vringh_init_iotlb_va);
  1183. /**
  1184. * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
  1185. * @vrh: the vring
  1186. * @iotlb: iotlb associated with this vring
  1187. * @iotlb_lock: spinlock to synchronize the iotlb accesses
  1188. */
  1189. void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
  1190. spinlock_t *iotlb_lock)
  1191. {
  1192. vrh->iotlb = iotlb;
  1193. vrh->iotlb_lock = iotlb_lock;
  1194. }
  1195. EXPORT_SYMBOL(vringh_set_iotlb);
  1196. /**
  1197. * vringh_getdesc_iotlb - get next available descriptor from ring with
  1198. * IOTLB.
  1199. * @vrh: the kernelspace vring.
  1200. * @riov: where to put the readable descriptors (or NULL)
  1201. * @wiov: where to put the writable descriptors (or NULL)
  1202. * @head: head index we received, for passing to vringh_complete_iotlb().
  1203. * @gfp: flags for allocating larger riov/wiov.
  1204. *
  1205. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  1206. *
  1207. * Note that on error return, you can tell the difference between an
  1208. * invalid ring and a single invalid descriptor: in the former case,
  1209. * *head will be vrh->vring.num. You may be able to ignore an invalid
  1210. * descriptor, but there's not much you can do with an invalid ring.
  1211. *
  1212. * Note that you can reuse riov and wiov with subsequent calls. Content is
  1213. * overwritten and memory reallocated if more space is needed.
  1214. * When you don't have to use riov and wiov anymore, you should clean up them
  1215. * calling vringh_kiov_cleanup() to release the memory, even on error!
  1216. */
  1217. int vringh_getdesc_iotlb(struct vringh *vrh,
  1218. struct vringh_kiov *riov,
  1219. struct vringh_kiov *wiov,
  1220. u16 *head,
  1221. gfp_t gfp)
  1222. {
  1223. int err;
  1224. err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx);
  1225. if (err < 0)
  1226. return err;
  1227. /* Empty... */
  1228. if (err == vrh->vring.num)
  1229. return 0;
  1230. *head = err;
  1231. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  1232. gfp, copydesc_iotlb);
  1233. if (err)
  1234. return err;
  1235. return 1;
  1236. }
  1237. EXPORT_SYMBOL(vringh_getdesc_iotlb);
  1238. /**
  1239. * vringh_iov_pull_iotlb - copy bytes from vring_iov.
  1240. * @vrh: the vring.
  1241. * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume)
  1242. * @dst: the place to copy.
  1243. * @len: the maximum length to copy.
  1244. *
  1245. * Returns the bytes copied <= len or a negative errno.
  1246. */
  1247. ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
  1248. struct vringh_kiov *riov,
  1249. void *dst, size_t len)
  1250. {
  1251. return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb);
  1252. }
  1253. EXPORT_SYMBOL(vringh_iov_pull_iotlb);
  1254. /**
  1255. * vringh_iov_push_iotlb - copy bytes into vring_iov.
  1256. * @vrh: the vring.
  1257. * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
  1258. * @src: the place to copy from.
  1259. * @len: the maximum length to copy.
  1260. *
  1261. * Returns the bytes copied <= len or a negative errno.
  1262. */
  1263. ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
  1264. struct vringh_kiov *wiov,
  1265. const void *src, size_t len)
  1266. {
  1267. return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb);
  1268. }
  1269. EXPORT_SYMBOL(vringh_iov_push_iotlb);
  1270. /**
  1271. * vringh_complete_iotlb - we've finished with descriptor, publish it.
  1272. * @vrh: the vring.
  1273. * @head: the head as filled in by vringh_getdesc_iotlb.
  1274. * @len: the length of data we have written.
  1275. *
  1276. * You should check vringh_need_notify_iotlb() after one or more calls
  1277. * to this function.
  1278. */
  1279. int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
  1280. {
  1281. struct vring_used_elem used;
  1282. used.id = cpu_to_vringh32(vrh, head);
  1283. used.len = cpu_to_vringh32(vrh, len);
  1284. return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb);
  1285. }
  1286. EXPORT_SYMBOL(vringh_complete_iotlb);
  1287. /**
  1288. * vringh_need_notify_iotlb - must we tell the other side about used buffers?
  1289. * @vrh: the vring we've called vringh_complete_iotlb() on.
  1290. *
  1291. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  1292. */
  1293. int vringh_need_notify_iotlb(struct vringh *vrh)
  1294. {
  1295. return __vringh_need_notify(vrh, getu16_iotlb);
  1296. }
  1297. EXPORT_SYMBOL(vringh_need_notify_iotlb);
  1298. #endif
  1299. MODULE_DESCRIPTION("host side of a virtio ring");
  1300. MODULE_LICENSE("GPL");