kbuf.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/kernel.h>
  3. #include <linux/errno.h>
  4. #include <linux/fs.h>
  5. #include <linux/file.h>
  6. #include <linux/mm.h>
  7. #include <linux/slab.h>
  8. #include <linux/namei.h>
  9. #include <linux/poll.h>
  10. #include <linux/vmalloc.h>
  11. #include <linux/io_uring.h>
  12. #include <uapi/linux/io_uring.h>
  13. #include "io_uring.h"
  14. #include "opdef.h"
  15. #include "kbuf.h"
  16. #include "memmap.h"
  17. /* BIDs are addressed by a 16-bit field in a CQE */
  18. #define MAX_BIDS_PER_BGID (1 << 16)
  19. /* Mapped buffer ring, return io_uring_buf from head */
  20. #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
  21. struct io_provide_buf {
  22. struct file *file;
  23. __u64 addr;
  24. __u32 len;
  25. __u32 bgid;
  26. __u32 nbufs;
  27. __u16 bid;
  28. };
  29. static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
  30. {
  31. /* No data consumed, return false early to avoid consuming the buffer */
  32. if (!len)
  33. return false;
  34. while (len) {
  35. struct io_uring_buf *buf;
  36. u32 buf_len, this_len;
  37. buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
  38. buf_len = READ_ONCE(buf->len);
  39. this_len = min_t(u32, len, buf_len);
  40. buf_len -= this_len;
  41. /* Stop looping for invalid buffer length of 0 */
  42. if (buf_len || !this_len) {
  43. WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len);
  44. WRITE_ONCE(buf->len, buf_len);
  45. return false;
  46. }
  47. WRITE_ONCE(buf->len, 0);
  48. bl->head++;
  49. len -= this_len;
  50. }
  51. return true;
  52. }
  53. bool io_kbuf_commit(struct io_kiocb *req,
  54. struct io_buffer_list *bl, int len, int nr)
  55. {
  56. if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
  57. return true;
  58. req->flags &= ~REQ_F_BUFFERS_COMMIT;
  59. if (unlikely(len < 0))
  60. return true;
  61. if (bl->flags & IOBL_INC)
  62. return io_kbuf_inc_commit(bl, len);
  63. bl->head += nr;
  64. return true;
  65. }
  66. static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
  67. unsigned int bgid)
  68. {
  69. lockdep_assert_held(&ctx->uring_lock);
  70. return xa_load(&ctx->io_bl_xa, bgid);
  71. }
  72. static int io_buffer_add_list(struct io_ring_ctx *ctx,
  73. struct io_buffer_list *bl, unsigned int bgid)
  74. {
  75. /*
  76. * Store buffer group ID and finally mark the list as visible.
  77. * The normal lookup doesn't care about the visibility as we're
  78. * always under the ->uring_lock, but lookups from mmap do.
  79. */
  80. bl->bgid = bgid;
  81. guard(mutex)(&ctx->mmap_lock);
  82. return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
  83. }
  84. void io_kbuf_drop_legacy(struct io_kiocb *req)
  85. {
  86. if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
  87. return;
  88. req->flags &= ~REQ_F_BUFFER_SELECTED;
  89. kfree(req->kbuf);
  90. req->kbuf = NULL;
  91. }
  92. bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
  93. {
  94. struct io_ring_ctx *ctx = req->ctx;
  95. struct io_buffer_list *bl;
  96. struct io_buffer *buf;
  97. io_ring_submit_lock(ctx, issue_flags);
  98. buf = req->kbuf;
  99. bl = io_buffer_get_list(ctx, buf->bgid);
  100. /*
  101. * If the buffer list was upgraded to a ring-based one, or removed,
  102. * while the request was in-flight in io-wq, drop it.
  103. */
  104. if (bl && !(bl->flags & IOBL_BUF_RING)) {
  105. list_add(&buf->list, &bl->buf_list);
  106. bl->nbufs++;
  107. } else {
  108. kfree(buf);
  109. }
  110. req->flags &= ~REQ_F_BUFFER_SELECTED;
  111. req->kbuf = NULL;
  112. io_ring_submit_unlock(ctx, issue_flags);
  113. return true;
  114. }
  115. static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
  116. struct io_buffer_list *bl)
  117. {
  118. if (!list_empty(&bl->buf_list)) {
  119. struct io_buffer *kbuf;
  120. kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
  121. list_del(&kbuf->list);
  122. bl->nbufs--;
  123. if (*len == 0 || *len > kbuf->len)
  124. *len = kbuf->len;
  125. if (list_empty(&bl->buf_list))
  126. req->flags |= REQ_F_BL_EMPTY;
  127. req->flags |= REQ_F_BUFFER_SELECTED;
  128. req->kbuf = kbuf;
  129. req->buf_index = kbuf->bid;
  130. return u64_to_user_ptr(kbuf->addr);
  131. }
  132. return NULL;
  133. }
  134. static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
  135. struct io_buffer_list *bl,
  136. struct iovec *iov)
  137. {
  138. void __user *buf;
  139. buf = io_provided_buffer_select(req, len, bl);
  140. if (unlikely(!buf))
  141. return -ENOBUFS;
  142. iov[0].iov_base = buf;
  143. iov[0].iov_len = *len;
  144. return 1;
  145. }
  146. static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags)
  147. {
  148. /*
  149. * If we came in unlocked, we have no choice but to consume the
  150. * buffer here, otherwise nothing ensures that the buffer won't
  151. * get used by others. This does mean it'll be pinned until the
  152. * IO completes, coming in unlocked means we're being called from
  153. * io-wq context and there may be further retries in async hybrid
  154. * mode. For the locked case, the caller must call commit when
  155. * the transfer completes (or if we get -EAGAIN and must poll of
  156. * retry).
  157. */
  158. if (issue_flags & IO_URING_F_UNLOCKED)
  159. return true;
  160. /* uring_cmd commits kbuf upfront, no need to auto-commit */
  161. if (!io_file_can_poll(req) && !io_is_uring_cmd(req))
  162. return true;
  163. return false;
  164. }
  165. static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
  166. struct io_buffer_list *bl,
  167. unsigned int issue_flags)
  168. {
  169. struct io_uring_buf_ring *br = bl->buf_ring;
  170. __u16 tail, head = bl->head;
  171. struct io_br_sel sel = { };
  172. struct io_uring_buf *buf;
  173. u32 buf_len;
  174. tail = smp_load_acquire(&br->tail);
  175. if (unlikely(tail == head))
  176. return sel;
  177. if (head + 1 == tail)
  178. req->flags |= REQ_F_BL_EMPTY;
  179. buf = io_ring_head_to_buf(br, head, bl->mask);
  180. buf_len = READ_ONCE(buf->len);
  181. if (*len == 0 || *len > buf_len)
  182. *len = buf_len;
  183. req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
  184. req->buf_index = READ_ONCE(buf->bid);
  185. sel.buf_list = bl;
  186. sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr));
  187. if (io_should_commit(req, issue_flags)) {
  188. if (!io_kbuf_commit(req, sel.buf_list, *len, 1))
  189. req->flags |= REQ_F_BUF_MORE;
  190. sel.buf_list = NULL;
  191. }
  192. return sel;
  193. }
  194. struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
  195. unsigned buf_group, unsigned int issue_flags)
  196. {
  197. struct io_ring_ctx *ctx = req->ctx;
  198. struct io_br_sel sel = { };
  199. struct io_buffer_list *bl;
  200. io_ring_submit_lock(req->ctx, issue_flags);
  201. bl = io_buffer_get_list(ctx, buf_group);
  202. if (likely(bl)) {
  203. if (bl->flags & IOBL_BUF_RING)
  204. sel = io_ring_buffer_select(req, len, bl, issue_flags);
  205. else
  206. sel.addr = io_provided_buffer_select(req, len, bl);
  207. }
  208. io_ring_submit_unlock(req->ctx, issue_flags);
  209. return sel;
  210. }
  211. /* cap it at a reasonable 256, will be one page even for 4K */
  212. #define PEEK_MAX_IMPORT 256
  213. static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
  214. struct io_buffer_list *bl)
  215. {
  216. struct io_uring_buf_ring *br = bl->buf_ring;
  217. struct iovec *iov = arg->iovs;
  218. int nr_iovs = arg->nr_iovs;
  219. __u16 nr_avail, tail, head;
  220. struct io_uring_buf *buf;
  221. tail = smp_load_acquire(&br->tail);
  222. head = bl->head;
  223. nr_avail = min_t(__u16, tail - head, UIO_MAXIOV);
  224. if (unlikely(!nr_avail))
  225. return -ENOBUFS;
  226. buf = io_ring_head_to_buf(br, head, bl->mask);
  227. if (arg->max_len) {
  228. u32 len = READ_ONCE(buf->len);
  229. size_t needed;
  230. if (unlikely(!len))
  231. return -ENOBUFS;
  232. needed = (arg->max_len + len - 1) / len;
  233. needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
  234. if (nr_avail > needed)
  235. nr_avail = needed;
  236. }
  237. /*
  238. * only alloc a bigger array if we know we have data to map, eg not
  239. * a speculative peek operation.
  240. */
  241. if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) {
  242. iov = kmalloc_objs(struct iovec, nr_avail);
  243. if (unlikely(!iov))
  244. return -ENOMEM;
  245. if (arg->mode & KBUF_MODE_FREE)
  246. kfree(arg->iovs);
  247. arg->iovs = iov;
  248. nr_iovs = nr_avail;
  249. } else if (nr_avail < nr_iovs) {
  250. nr_iovs = nr_avail;
  251. }
  252. /* set it to max, if not set, so we can use it unconditionally */
  253. if (!arg->max_len)
  254. arg->max_len = INT_MAX;
  255. req->buf_index = READ_ONCE(buf->bid);
  256. do {
  257. u32 len = READ_ONCE(buf->len);
  258. /* truncate end piece, if needed, for non partial buffers */
  259. if (len > arg->max_len) {
  260. len = arg->max_len;
  261. if (!(bl->flags & IOBL_INC)) {
  262. arg->partial_map = 1;
  263. if (iov != arg->iovs)
  264. break;
  265. WRITE_ONCE(buf->len, len);
  266. }
  267. }
  268. iov->iov_base = u64_to_user_ptr(READ_ONCE(buf->addr));
  269. iov->iov_len = len;
  270. iov++;
  271. arg->out_len += len;
  272. arg->max_len -= len;
  273. if (!arg->max_len)
  274. break;
  275. buf = io_ring_head_to_buf(br, ++head, bl->mask);
  276. } while (--nr_iovs);
  277. if (head == tail)
  278. req->flags |= REQ_F_BL_EMPTY;
  279. req->flags |= REQ_F_BUFFER_RING;
  280. return iov - arg->iovs;
  281. }
  282. int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
  283. struct io_br_sel *sel, unsigned int issue_flags)
  284. {
  285. struct io_ring_ctx *ctx = req->ctx;
  286. int ret = -ENOENT;
  287. io_ring_submit_lock(ctx, issue_flags);
  288. sel->buf_list = io_buffer_get_list(ctx, arg->buf_group);
  289. if (unlikely(!sel->buf_list))
  290. goto out_unlock;
  291. if (sel->buf_list->flags & IOBL_BUF_RING) {
  292. ret = io_ring_buffers_peek(req, arg, sel->buf_list);
  293. /*
  294. * Don't recycle these buffers if we need to go through poll.
  295. * Nobody else can use them anyway, and holding on to provided
  296. * buffers for a send/write operation would happen on the app
  297. * side anyway with normal buffers. Besides, we already
  298. * committed them, they cannot be put back in the queue.
  299. */
  300. if (ret > 0) {
  301. req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE;
  302. if (!io_kbuf_commit(req, sel->buf_list, arg->out_len, ret))
  303. req->flags |= REQ_F_BUF_MORE;
  304. }
  305. } else {
  306. ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs);
  307. }
  308. out_unlock:
  309. if (issue_flags & IO_URING_F_UNLOCKED) {
  310. sel->buf_list = NULL;
  311. mutex_unlock(&ctx->uring_lock);
  312. }
  313. return ret;
  314. }
  315. int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
  316. struct io_br_sel *sel)
  317. {
  318. struct io_ring_ctx *ctx = req->ctx;
  319. struct io_buffer_list *bl;
  320. int ret;
  321. lockdep_assert_held(&ctx->uring_lock);
  322. bl = io_buffer_get_list(ctx, arg->buf_group);
  323. if (unlikely(!bl))
  324. return -ENOENT;
  325. if (bl->flags & IOBL_BUF_RING) {
  326. ret = io_ring_buffers_peek(req, arg, bl);
  327. if (ret > 0)
  328. req->flags |= REQ_F_BUFFERS_COMMIT;
  329. sel->buf_list = bl;
  330. return ret;
  331. }
  332. /* don't support multiple buffer selections for legacy */
  333. sel->buf_list = NULL;
  334. return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
  335. }
  336. static inline bool __io_put_kbuf_ring(struct io_kiocb *req,
  337. struct io_buffer_list *bl, int len, int nr)
  338. {
  339. bool ret = true;
  340. if (bl)
  341. ret = io_kbuf_commit(req, bl, len, nr);
  342. if (ret && (req->flags & REQ_F_BUF_MORE))
  343. ret = false;
  344. req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE);
  345. return ret;
  346. }
  347. unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
  348. int len, int nbufs)
  349. {
  350. unsigned int ret;
  351. ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
  352. if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) {
  353. io_kbuf_drop_legacy(req);
  354. return ret;
  355. }
  356. if (!__io_put_kbuf_ring(req, bl, len, nbufs))
  357. ret |= IORING_CQE_F_BUF_MORE;
  358. return ret;
  359. }
  360. static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
  361. struct io_buffer_list *bl,
  362. unsigned long nbufs)
  363. {
  364. unsigned long i = 0;
  365. struct io_buffer *nxt;
  366. /* protects io_buffers_cache */
  367. lockdep_assert_held(&ctx->uring_lock);
  368. WARN_ON_ONCE(bl->flags & IOBL_BUF_RING);
  369. for (i = 0; i < nbufs && !list_empty(&bl->buf_list); i++) {
  370. nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
  371. list_del(&nxt->list);
  372. bl->nbufs--;
  373. kfree(nxt);
  374. cond_resched();
  375. }
  376. return i;
  377. }
  378. static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
  379. {
  380. if (bl->flags & IOBL_BUF_RING)
  381. io_free_region(ctx->user, &bl->region);
  382. else
  383. io_remove_buffers_legacy(ctx, bl, -1U);
  384. kfree(bl);
  385. }
  386. void io_destroy_buffers(struct io_ring_ctx *ctx)
  387. {
  388. struct io_buffer_list *bl;
  389. while (1) {
  390. unsigned long index = 0;
  391. scoped_guard(mutex, &ctx->mmap_lock) {
  392. bl = xa_find(&ctx->io_bl_xa, &index, ULONG_MAX, XA_PRESENT);
  393. if (bl)
  394. xa_erase(&ctx->io_bl_xa, bl->bgid);
  395. }
  396. if (!bl)
  397. break;
  398. io_put_bl(ctx, bl);
  399. }
  400. }
  401. static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
  402. {
  403. scoped_guard(mutex, &ctx->mmap_lock)
  404. WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl);
  405. io_put_bl(ctx, bl);
  406. }
  407. int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  408. {
  409. struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
  410. u64 tmp;
  411. if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
  412. sqe->splice_fd_in)
  413. return -EINVAL;
  414. tmp = READ_ONCE(sqe->fd);
  415. if (!tmp || tmp > MAX_BIDS_PER_BGID)
  416. return -EINVAL;
  417. memset(p, 0, sizeof(*p));
  418. p->nbufs = tmp;
  419. p->bgid = READ_ONCE(sqe->buf_group);
  420. return 0;
  421. }
  422. int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  423. {
  424. unsigned long size, tmp_check;
  425. struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
  426. u64 tmp;
  427. if (sqe->rw_flags || sqe->splice_fd_in)
  428. return -EINVAL;
  429. tmp = READ_ONCE(sqe->fd);
  430. if (!tmp || tmp > MAX_BIDS_PER_BGID)
  431. return -E2BIG;
  432. p->nbufs = tmp;
  433. p->addr = READ_ONCE(sqe->addr);
  434. p->len = READ_ONCE(sqe->len);
  435. if (!p->len)
  436. return -EINVAL;
  437. if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
  438. &size))
  439. return -EOVERFLOW;
  440. if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
  441. return -EOVERFLOW;
  442. if (!access_ok(u64_to_user_ptr(p->addr), size))
  443. return -EFAULT;
  444. p->bgid = READ_ONCE(sqe->buf_group);
  445. tmp = READ_ONCE(sqe->off);
  446. if (tmp > USHRT_MAX)
  447. return -E2BIG;
  448. if (tmp + p->nbufs > MAX_BIDS_PER_BGID)
  449. return -EINVAL;
  450. p->bid = tmp;
  451. return 0;
  452. }
  453. static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
  454. struct io_buffer_list *bl)
  455. {
  456. struct io_buffer *buf;
  457. u64 addr = pbuf->addr;
  458. int ret = -ENOMEM, i, bid = pbuf->bid;
  459. for (i = 0; i < pbuf->nbufs; i++) {
  460. /*
  461. * Nonsensical to have more than sizeof(bid) buffers in a
  462. * buffer list, as the application then has no way of knowing
  463. * which duplicate bid refers to what buffer.
  464. */
  465. if (bl->nbufs == USHRT_MAX) {
  466. ret = -EOVERFLOW;
  467. break;
  468. }
  469. buf = kmalloc_obj(*buf, GFP_KERNEL_ACCOUNT);
  470. if (!buf)
  471. break;
  472. list_add_tail(&buf->list, &bl->buf_list);
  473. bl->nbufs++;
  474. buf->addr = addr;
  475. buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
  476. buf->bid = bid;
  477. buf->bgid = pbuf->bgid;
  478. addr += pbuf->len;
  479. bid++;
  480. cond_resched();
  481. }
  482. return i ? 0 : ret;
  483. }
  484. static int __io_manage_buffers_legacy(struct io_kiocb *req,
  485. struct io_buffer_list *bl)
  486. {
  487. struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
  488. int ret;
  489. if (!bl) {
  490. if (req->opcode != IORING_OP_PROVIDE_BUFFERS)
  491. return -ENOENT;
  492. bl = kzalloc_obj(*bl, GFP_KERNEL_ACCOUNT);
  493. if (!bl)
  494. return -ENOMEM;
  495. INIT_LIST_HEAD(&bl->buf_list);
  496. ret = io_buffer_add_list(req->ctx, bl, p->bgid);
  497. if (ret) {
  498. kfree(bl);
  499. return ret;
  500. }
  501. }
  502. /* can't use provide/remove buffers command on mapped buffers */
  503. if (bl->flags & IOBL_BUF_RING)
  504. return -EINVAL;
  505. if (req->opcode == IORING_OP_PROVIDE_BUFFERS)
  506. return io_add_buffers(req->ctx, p, bl);
  507. return io_remove_buffers_legacy(req->ctx, bl, p->nbufs);
  508. }
  509. int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags)
  510. {
  511. struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
  512. struct io_ring_ctx *ctx = req->ctx;
  513. struct io_buffer_list *bl;
  514. int ret;
  515. io_ring_submit_lock(ctx, issue_flags);
  516. bl = io_buffer_get_list(ctx, p->bgid);
  517. ret = __io_manage_buffers_legacy(req, bl);
  518. io_ring_submit_unlock(ctx, issue_flags);
  519. if (ret < 0)
  520. req_set_fail(req);
  521. io_req_set_res(req, ret, 0);
  522. return IOU_COMPLETE;
  523. }
  524. int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
  525. {
  526. struct io_uring_buf_reg reg;
  527. struct io_buffer_list *bl;
  528. struct io_uring_region_desc rd;
  529. struct io_uring_buf_ring *br;
  530. unsigned long mmap_offset;
  531. unsigned long ring_size;
  532. int ret;
  533. lockdep_assert_held(&ctx->uring_lock);
  534. if (copy_from_user(&reg, arg, sizeof(reg)))
  535. return -EFAULT;
  536. if (!mem_is_zero(reg.resv, sizeof(reg.resv)))
  537. return -EINVAL;
  538. if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC))
  539. return -EINVAL;
  540. if (!is_power_of_2(reg.ring_entries))
  541. return -EINVAL;
  542. /* cannot disambiguate full vs empty due to head/tail size */
  543. if (reg.ring_entries >= 65536)
  544. return -EINVAL;
  545. bl = io_buffer_get_list(ctx, reg.bgid);
  546. if (bl) {
  547. /* if mapped buffer ring OR classic exists, don't allow */
  548. if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list))
  549. return -EEXIST;
  550. io_destroy_bl(ctx, bl);
  551. }
  552. bl = kzalloc_obj(*bl, GFP_KERNEL_ACCOUNT);
  553. if (!bl)
  554. return -ENOMEM;
  555. mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
  556. ring_size = flex_array_size(br, bufs, reg.ring_entries);
  557. memset(&rd, 0, sizeof(rd));
  558. rd.size = PAGE_ALIGN(ring_size);
  559. if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
  560. rd.user_addr = reg.ring_addr;
  561. rd.flags |= IORING_MEM_REGION_TYPE_USER;
  562. }
  563. ret = io_create_region(ctx, &bl->region, &rd, mmap_offset);
  564. if (ret)
  565. goto fail;
  566. br = io_region_get_ptr(&bl->region);
  567. #ifdef SHM_COLOUR
  568. /*
  569. * On platforms that have specific aliasing requirements, SHM_COLOUR
  570. * is set and we must guarantee that the kernel and user side align
  571. * nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
  572. * the application mmap's the provided ring buffer. Fail the request
  573. * if we, by chance, don't end up with aligned addresses. The app
  574. * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
  575. * this transparently.
  576. */
  577. if (!(reg.flags & IOU_PBUF_RING_MMAP) &&
  578. ((reg.ring_addr | (unsigned long)br) & (SHM_COLOUR - 1))) {
  579. ret = -EINVAL;
  580. goto fail;
  581. }
  582. #endif
  583. bl->nr_entries = reg.ring_entries;
  584. bl->mask = reg.ring_entries - 1;
  585. bl->flags |= IOBL_BUF_RING;
  586. bl->buf_ring = br;
  587. if (reg.flags & IOU_PBUF_RING_INC)
  588. bl->flags |= IOBL_INC;
  589. ret = io_buffer_add_list(ctx, bl, reg.bgid);
  590. if (!ret)
  591. return 0;
  592. fail:
  593. io_free_region(ctx->user, &bl->region);
  594. kfree(bl);
  595. return ret;
  596. }
  597. int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
  598. {
  599. struct io_uring_buf_reg reg;
  600. struct io_buffer_list *bl;
  601. lockdep_assert_held(&ctx->uring_lock);
  602. if (copy_from_user(&reg, arg, sizeof(reg)))
  603. return -EFAULT;
  604. if (!mem_is_zero(reg.resv, sizeof(reg.resv)) || reg.flags)
  605. return -EINVAL;
  606. bl = io_buffer_get_list(ctx, reg.bgid);
  607. if (!bl)
  608. return -ENOENT;
  609. if (!(bl->flags & IOBL_BUF_RING))
  610. return -EINVAL;
  611. scoped_guard(mutex, &ctx->mmap_lock)
  612. xa_erase(&ctx->io_bl_xa, bl->bgid);
  613. io_put_bl(ctx, bl);
  614. return 0;
  615. }
  616. int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
  617. {
  618. struct io_uring_buf_status buf_status;
  619. struct io_buffer_list *bl;
  620. if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
  621. return -EFAULT;
  622. if (!mem_is_zero(buf_status.resv, sizeof(buf_status.resv)))
  623. return -EINVAL;
  624. bl = io_buffer_get_list(ctx, buf_status.buf_group);
  625. if (!bl)
  626. return -ENOENT;
  627. if (!(bl->flags & IOBL_BUF_RING))
  628. return -EINVAL;
  629. buf_status.head = bl->head;
  630. if (copy_to_user(arg, &buf_status, sizeof(buf_status)))
  631. return -EFAULT;
  632. return 0;
  633. }
  634. struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
  635. unsigned int bgid)
  636. {
  637. struct io_buffer_list *bl;
  638. lockdep_assert_held(&ctx->mmap_lock);
  639. bl = xa_load(&ctx->io_bl_xa, bgid);
  640. if (!bl || !(bl->flags & IOBL_BUF_RING))
  641. return NULL;
  642. return &bl->region;
  643. }