blk-map.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Functions related to mapping data to requests
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/sched/task_stack.h>
  7. #include <linux/module.h>
  8. #include <linux/bio.h>
  9. #include <linux/blkdev.h>
  10. #include <linux/uio.h>
  11. #include "blk.h"
  12. struct bio_map_data {
  13. bool is_our_pages : 1;
  14. bool is_null_mapped : 1;
  15. struct iov_iter iter;
  16. struct iovec iov[];
  17. };
  18. static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
  19. gfp_t gfp_mask)
  20. {
  21. struct bio_map_data *bmd;
  22. if (data->nr_segs > UIO_MAXIOV)
  23. return NULL;
  24. bmd = kmalloc_flex(*bmd, iov, data->nr_segs, gfp_mask);
  25. if (!bmd)
  26. return NULL;
  27. bmd->iter = *data;
  28. if (iter_is_iovec(data)) {
  29. memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs);
  30. bmd->iter.__iov = bmd->iov;
  31. }
  32. return bmd;
  33. }
  34. static inline void blk_mq_map_bio_put(struct bio *bio)
  35. {
  36. bio_put(bio);
  37. }
  38. static struct bio *blk_rq_map_bio_alloc(struct request *rq,
  39. unsigned int nr_vecs, gfp_t gfp_mask)
  40. {
  41. struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL;
  42. struct bio *bio;
  43. bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask,
  44. &fs_bio_set);
  45. if (!bio)
  46. return NULL;
  47. return bio;
  48. }
  49. /**
  50. * bio_copy_from_iter - copy all pages from iov_iter to bio
  51. * @bio: The &struct bio which describes the I/O as destination
  52. * @iter: iov_iter as source
  53. *
  54. * Copy all pages from iov_iter to bio.
  55. * Returns 0 on success, or error on failure.
  56. */
  57. static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
  58. {
  59. struct bio_vec *bvec;
  60. struct bvec_iter_all iter_all;
  61. bio_for_each_segment_all(bvec, bio, iter_all) {
  62. ssize_t ret;
  63. ret = copy_page_from_iter(bvec->bv_page,
  64. bvec->bv_offset,
  65. bvec->bv_len,
  66. iter);
  67. if (!iov_iter_count(iter))
  68. break;
  69. if (ret < bvec->bv_len)
  70. return -EFAULT;
  71. }
  72. return 0;
  73. }
  74. /**
  75. * bio_copy_to_iter - copy all pages from bio to iov_iter
  76. * @bio: The &struct bio which describes the I/O as source
  77. * @iter: iov_iter as destination
  78. *
  79. * Copy all pages from bio to iov_iter.
  80. * Returns 0 on success, or error on failure.
  81. */
  82. static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
  83. {
  84. struct bio_vec *bvec;
  85. struct bvec_iter_all iter_all;
  86. bio_for_each_segment_all(bvec, bio, iter_all) {
  87. ssize_t ret;
  88. ret = copy_page_to_iter(bvec->bv_page,
  89. bvec->bv_offset,
  90. bvec->bv_len,
  91. &iter);
  92. if (!iov_iter_count(&iter))
  93. break;
  94. if (ret < bvec->bv_len)
  95. return -EFAULT;
  96. }
  97. return 0;
  98. }
  99. /**
  100. * bio_uncopy_user - finish previously mapped bio
  101. * @bio: bio being terminated
  102. *
  103. * Free pages allocated from bio_copy_user_iov() and write back data
  104. * to user space in case of a read.
  105. */
  106. static int bio_uncopy_user(struct bio *bio)
  107. {
  108. struct bio_map_data *bmd = bio->bi_private;
  109. int ret = 0;
  110. if (!bmd->is_null_mapped) {
  111. /*
  112. * if we're in a workqueue, the request is orphaned, so
  113. * don't copy into a random user address space, just free
  114. * and return -EINTR so user space doesn't expect any data.
  115. */
  116. if (!current->mm)
  117. ret = -EINTR;
  118. else if (bio_data_dir(bio) == READ)
  119. ret = bio_copy_to_iter(bio, bmd->iter);
  120. if (bmd->is_our_pages)
  121. bio_free_pages(bio);
  122. }
  123. kfree(bmd);
  124. return ret;
  125. }
  126. static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
  127. struct iov_iter *iter, gfp_t gfp_mask)
  128. {
  129. struct bio_map_data *bmd;
  130. struct page *page;
  131. struct bio *bio;
  132. int i = 0, ret;
  133. int nr_pages;
  134. unsigned int len = iter->count;
  135. unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
  136. bmd = bio_alloc_map_data(iter, gfp_mask);
  137. if (!bmd)
  138. return -ENOMEM;
  139. /*
  140. * We need to do a deep copy of the iov_iter including the iovecs.
  141. * The caller provided iov might point to an on-stack or otherwise
  142. * shortlived one.
  143. */
  144. bmd->is_our_pages = !map_data;
  145. bmd->is_null_mapped = (map_data && map_data->null_mapped);
  146. nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
  147. ret = -ENOMEM;
  148. bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask);
  149. if (!bio)
  150. goto out_bmd;
  151. if (map_data) {
  152. nr_pages = 1U << map_data->page_order;
  153. i = map_data->offset / PAGE_SIZE;
  154. }
  155. while (len) {
  156. unsigned int bytes = PAGE_SIZE;
  157. bytes -= offset;
  158. if (bytes > len)
  159. bytes = len;
  160. if (map_data) {
  161. if (i == map_data->nr_entries * nr_pages) {
  162. ret = -ENOMEM;
  163. goto cleanup;
  164. }
  165. page = map_data->pages[i / nr_pages];
  166. page += (i % nr_pages);
  167. i++;
  168. } else {
  169. page = alloc_page(GFP_NOIO | gfp_mask);
  170. if (!page) {
  171. ret = -ENOMEM;
  172. goto cleanup;
  173. }
  174. }
  175. if (bio_add_page(bio, page, bytes, offset) < bytes) {
  176. if (!map_data)
  177. __free_page(page);
  178. break;
  179. }
  180. len -= bytes;
  181. offset = 0;
  182. }
  183. if (map_data)
  184. map_data->offset += bio->bi_iter.bi_size;
  185. /*
  186. * success
  187. */
  188. if (iov_iter_rw(iter) == WRITE &&
  189. (!map_data || !map_data->null_mapped)) {
  190. ret = bio_copy_from_iter(bio, iter);
  191. if (ret)
  192. goto cleanup;
  193. } else if (map_data && map_data->from_user) {
  194. struct iov_iter iter2 = *iter;
  195. /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */
  196. iter2.data_source = ITER_SOURCE;
  197. ret = bio_copy_from_iter(bio, &iter2);
  198. if (ret)
  199. goto cleanup;
  200. } else {
  201. if (bmd->is_our_pages)
  202. zero_fill_bio(bio);
  203. iov_iter_advance(iter, bio->bi_iter.bi_size);
  204. }
  205. bio->bi_private = bmd;
  206. ret = blk_rq_append_bio(rq, bio);
  207. if (ret)
  208. goto cleanup;
  209. return 0;
  210. cleanup:
  211. if (!map_data)
  212. bio_free_pages(bio);
  213. blk_mq_map_bio_put(bio);
  214. out_bmd:
  215. kfree(bmd);
  216. return ret;
  217. }
  218. static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
  219. gfp_t gfp_mask)
  220. {
  221. unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
  222. struct bio *bio;
  223. int ret;
  224. if (!iov_iter_count(iter))
  225. return -EINVAL;
  226. bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
  227. if (!bio)
  228. return -ENOMEM;
  229. /*
  230. * No alignment requirements on our part to support arbitrary
  231. * passthrough commands.
  232. */
  233. ret = bio_iov_iter_get_pages(bio, iter, 0);
  234. if (ret)
  235. goto out_put;
  236. ret = blk_rq_append_bio(rq, bio);
  237. if (ret)
  238. goto out_release;
  239. return 0;
  240. out_release:
  241. bio_release_pages(bio, false);
  242. out_put:
  243. blk_mq_map_bio_put(bio);
  244. return ret;
  245. }
  246. static void bio_invalidate_vmalloc_pages(struct bio *bio)
  247. {
  248. #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
  249. if (bio->bi_private && !op_is_write(bio_op(bio))) {
  250. unsigned long i, len = 0;
  251. for (i = 0; i < bio->bi_vcnt; i++)
  252. len += bio->bi_io_vec[i].bv_len;
  253. invalidate_kernel_vmap_range(bio->bi_private, len);
  254. }
  255. #endif
  256. }
  257. static void bio_map_kern_endio(struct bio *bio)
  258. {
  259. bio_invalidate_vmalloc_pages(bio);
  260. blk_mq_map_bio_put(bio);
  261. }
  262. static struct bio *bio_map_kern(struct request *rq, void *data, unsigned int len,
  263. gfp_t gfp_mask)
  264. {
  265. unsigned int nr_vecs = bio_add_max_vecs(data, len);
  266. struct bio *bio;
  267. bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
  268. if (!bio)
  269. return ERR_PTR(-ENOMEM);
  270. if (is_vmalloc_addr(data)) {
  271. bio->bi_private = data;
  272. if (!bio_add_vmalloc(bio, data, len)) {
  273. blk_mq_map_bio_put(bio);
  274. return ERR_PTR(-EINVAL);
  275. }
  276. } else {
  277. bio_add_virt_nofail(bio, data, len);
  278. }
  279. bio->bi_end_io = bio_map_kern_endio;
  280. return bio;
  281. }
  282. static void bio_copy_kern_endio(struct bio *bio)
  283. {
  284. bio_free_pages(bio);
  285. blk_mq_map_bio_put(bio);
  286. }
  287. static void bio_copy_kern_endio_read(struct bio *bio)
  288. {
  289. char *p = bio->bi_private;
  290. struct bio_vec *bvec;
  291. struct bvec_iter_all iter_all;
  292. bio_for_each_segment_all(bvec, bio, iter_all) {
  293. memcpy_from_bvec(p, bvec);
  294. p += bvec->bv_len;
  295. }
  296. bio_copy_kern_endio(bio);
  297. }
  298. /**
  299. * bio_copy_kern - copy kernel address into bio
  300. * @rq: request to fill
  301. * @data: pointer to buffer to copy
  302. * @len: length in bytes
  303. * @op: bio/request operation
  304. * @gfp_mask: allocation flags for bio and page allocation
  305. *
  306. * copy the kernel address into a bio suitable for io to a block
  307. * device. Returns an error pointer in case of error.
  308. */
  309. static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int len,
  310. gfp_t gfp_mask)
  311. {
  312. enum req_op op = req_op(rq);
  313. unsigned long kaddr = (unsigned long)data;
  314. unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  315. unsigned long start = kaddr >> PAGE_SHIFT;
  316. struct bio *bio;
  317. void *p = data;
  318. int nr_pages = 0;
  319. /*
  320. * Overflow, abort
  321. */
  322. if (end < start)
  323. return ERR_PTR(-EINVAL);
  324. nr_pages = end - start;
  325. bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask);
  326. if (!bio)
  327. return ERR_PTR(-ENOMEM);
  328. while (len) {
  329. struct page *page;
  330. unsigned int bytes = PAGE_SIZE;
  331. if (bytes > len)
  332. bytes = len;
  333. page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask);
  334. if (!page)
  335. goto cleanup;
  336. if (op_is_write(op))
  337. memcpy(page_address(page), p, bytes);
  338. __bio_add_page(bio, page, bytes, 0);
  339. len -= bytes;
  340. p += bytes;
  341. }
  342. if (op_is_write(op)) {
  343. bio->bi_end_io = bio_copy_kern_endio;
  344. } else {
  345. bio->bi_end_io = bio_copy_kern_endio_read;
  346. bio->bi_private = data;
  347. }
  348. return bio;
  349. cleanup:
  350. bio_free_pages(bio);
  351. blk_mq_map_bio_put(bio);
  352. return ERR_PTR(-ENOMEM);
  353. }
  354. /*
  355. * Append a bio to a passthrough request. Only works if the bio can be merged
  356. * into the request based on the driver constraints.
  357. */
  358. int blk_rq_append_bio(struct request *rq, struct bio *bio)
  359. {
  360. const struct queue_limits *lim = &rq->q->limits;
  361. unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
  362. unsigned int nr_segs = 0;
  363. int ret;
  364. /* check that the data layout matches the hardware restrictions */
  365. ret = bio_split_io_at(bio, lim, &nr_segs, max_bytes, 0);
  366. if (ret) {
  367. /* if we would have to split the bio, copy instead */
  368. if (ret > 0)
  369. ret = -EREMOTEIO;
  370. return ret;
  371. }
  372. if (rq->bio) {
  373. if (!ll_back_merge_fn(rq, bio, nr_segs))
  374. return -EINVAL;
  375. rq->phys_gap_bit = bio_seg_gap(rq->q, rq->biotail, bio,
  376. rq->phys_gap_bit);
  377. rq->biotail->bi_next = bio;
  378. rq->biotail = bio;
  379. rq->__data_len += bio->bi_iter.bi_size;
  380. bio_crypt_free_ctx(bio);
  381. return 0;
  382. }
  383. rq->nr_phys_segments = nr_segs;
  384. rq->bio = rq->biotail = bio;
  385. rq->__data_len = bio->bi_iter.bi_size;
  386. rq->phys_gap_bit = bio->bi_bvec_gap_bit;
  387. return 0;
  388. }
  389. EXPORT_SYMBOL(blk_rq_append_bio);
  390. /* Prepare bio for passthrough IO given ITER_BVEC iter */
  391. static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
  392. {
  393. unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
  394. struct bio *bio;
  395. int ret;
  396. if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
  397. return -EINVAL;
  398. /* reuse the bvecs from the iterator instead of allocating new ones */
  399. bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
  400. if (!bio)
  401. return -ENOMEM;
  402. bio_iov_bvec_set(bio, iter);
  403. ret = blk_rq_append_bio(rq, bio);
  404. if (ret)
  405. blk_mq_map_bio_put(bio);
  406. return ret;
  407. }
  408. /**
  409. * blk_rq_map_user_iov - map user data to a request, for passthrough requests
  410. * @q: request queue where request should be inserted
  411. * @rq: request to map data to
  412. * @map_data: pointer to the rq_map_data holding pages (if necessary)
  413. * @iter: iovec iterator
  414. * @gfp_mask: memory allocation flags
  415. *
  416. * Description:
  417. * Data will be mapped directly for zero copy I/O, if possible. Otherwise
  418. * a kernel bounce buffer is used.
  419. *
  420. * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
  421. * still in process context.
  422. */
  423. int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
  424. struct rq_map_data *map_data,
  425. const struct iov_iter *iter, gfp_t gfp_mask)
  426. {
  427. bool copy = false, map_bvec = false;
  428. unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits);
  429. struct bio *bio = NULL;
  430. struct iov_iter i;
  431. int ret = -EINVAL;
  432. if (map_data)
  433. copy = true;
  434. else if (iov_iter_alignment(iter) & align)
  435. copy = true;
  436. else if (iov_iter_is_bvec(iter))
  437. map_bvec = true;
  438. else if (!user_backed_iter(iter))
  439. copy = true;
  440. else if (queue_virt_boundary(q))
  441. copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
  442. if (map_bvec) {
  443. ret = blk_rq_map_user_bvec(rq, iter);
  444. if (!ret)
  445. return 0;
  446. if (ret != -EREMOTEIO)
  447. goto fail;
  448. /* fall back to copying the data on limits mismatches */
  449. copy = true;
  450. }
  451. i = *iter;
  452. do {
  453. if (copy)
  454. ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
  455. else
  456. ret = bio_map_user_iov(rq, &i, gfp_mask);
  457. if (ret) {
  458. if (ret == -EREMOTEIO)
  459. ret = -EINVAL;
  460. goto unmap_rq;
  461. }
  462. if (!bio)
  463. bio = rq->bio;
  464. } while (iov_iter_count(&i));
  465. return 0;
  466. unmap_rq:
  467. blk_rq_unmap_user(bio);
  468. fail:
  469. rq->bio = NULL;
  470. return ret;
  471. }
  472. EXPORT_SYMBOL(blk_rq_map_user_iov);
  473. int blk_rq_map_user(struct request_queue *q, struct request *rq,
  474. struct rq_map_data *map_data, void __user *ubuf,
  475. unsigned long len, gfp_t gfp_mask)
  476. {
  477. struct iov_iter i;
  478. int ret = import_ubuf(rq_data_dir(rq), ubuf, len, &i);
  479. if (unlikely(ret < 0))
  480. return ret;
  481. return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
  482. }
  483. EXPORT_SYMBOL(blk_rq_map_user);
  484. int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data,
  485. void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask,
  486. bool vec, int iov_count, bool check_iter_count, int rw)
  487. {
  488. int ret = 0;
  489. if (vec) {
  490. struct iovec fast_iov[UIO_FASTIOV];
  491. struct iovec *iov = fast_iov;
  492. struct iov_iter iter;
  493. ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len,
  494. UIO_FASTIOV, &iov, &iter);
  495. if (ret < 0)
  496. return ret;
  497. if (iov_count) {
  498. /* SG_IO howto says that the shorter of the two wins */
  499. iov_iter_truncate(&iter, buf_len);
  500. if (check_iter_count && !iov_iter_count(&iter)) {
  501. kfree(iov);
  502. return -EINVAL;
  503. }
  504. }
  505. ret = blk_rq_map_user_iov(req->q, req, map_data, &iter,
  506. gfp_mask);
  507. kfree(iov);
  508. } else if (buf_len) {
  509. ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len,
  510. gfp_mask);
  511. }
  512. return ret;
  513. }
  514. EXPORT_SYMBOL(blk_rq_map_user_io);
  515. /**
  516. * blk_rq_unmap_user - unmap a request with user data
  517. * @bio: start of bio list
  518. *
  519. * Description:
  520. * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
  521. * supply the original rq->bio from the blk_rq_map_user() return, since
  522. * the I/O completion may have changed rq->bio.
  523. */
  524. int blk_rq_unmap_user(struct bio *bio)
  525. {
  526. struct bio *next_bio;
  527. int ret = 0, ret2;
  528. while (bio) {
  529. if (bio->bi_private) {
  530. ret2 = bio_uncopy_user(bio);
  531. if (ret2 && !ret)
  532. ret = ret2;
  533. } else {
  534. bio_release_pages(bio, bio_data_dir(bio) == READ);
  535. }
  536. if (bio_integrity(bio))
  537. bio_integrity_unmap_user(bio);
  538. next_bio = bio;
  539. bio = bio->bi_next;
  540. blk_mq_map_bio_put(next_bio);
  541. }
  542. return ret;
  543. }
  544. EXPORT_SYMBOL(blk_rq_unmap_user);
  545. /**
  546. * blk_rq_map_kern - map kernel data to a request, for passthrough requests
  547. * @rq: request to fill
  548. * @kbuf: the kernel buffer
  549. * @len: length of user data
  550. * @gfp_mask: memory allocation flags
  551. *
  552. * Description:
  553. * Data will be mapped directly if possible. Otherwise a bounce
  554. * buffer is used. Can be called multiple times to append multiple
  555. * buffers.
  556. */
  557. int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
  558. gfp_t gfp_mask)
  559. {
  560. unsigned long addr = (unsigned long) kbuf;
  561. struct bio *bio;
  562. int ret;
  563. if (len > (queue_max_hw_sectors(rq->q) << SECTOR_SHIFT))
  564. return -EINVAL;
  565. if (!len || !kbuf)
  566. return -EINVAL;
  567. if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf))
  568. bio = bio_copy_kern(rq, kbuf, len, gfp_mask);
  569. else
  570. bio = bio_map_kern(rq, kbuf, len, gfp_mask);
  571. if (IS_ERR(bio))
  572. return PTR_ERR(bio);
  573. ret = blk_rq_append_bio(rq, bio);
  574. if (unlikely(ret))
  575. blk_mq_map_bio_put(bio);
  576. return ret;
  577. }
  578. EXPORT_SYMBOL(blk_rq_map_kern);