blk-mq-dma.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2025 Christoph Hellwig
  4. */
  5. #include <linux/blk-integrity.h>
  6. #include <linux/blk-mq-dma.h>
  7. #include "blk.h"
  8. static bool __blk_map_iter_next(struct blk_map_iter *iter)
  9. {
  10. if (iter->iter.bi_size)
  11. return true;
  12. if (!iter->bio || !iter->bio->bi_next)
  13. return false;
  14. iter->bio = iter->bio->bi_next;
  15. if (iter->is_integrity) {
  16. iter->iter = bio_integrity(iter->bio)->bip_iter;
  17. iter->bvecs = bio_integrity(iter->bio)->bip_vec;
  18. } else {
  19. iter->iter = iter->bio->bi_iter;
  20. iter->bvecs = iter->bio->bi_io_vec;
  21. }
  22. return true;
  23. }
  24. static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter,
  25. struct phys_vec *vec)
  26. {
  27. unsigned int max_size;
  28. struct bio_vec bv;
  29. if (!iter->iter.bi_size)
  30. return false;
  31. bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter);
  32. vec->paddr = bvec_phys(&bv);
  33. max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
  34. bv.bv_len = min(bv.bv_len, max_size);
  35. bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len);
  36. /*
  37. * If we are entirely done with this bi_io_vec entry, check if the next
  38. * one could be merged into it. This typically happens when moving to
  39. * the next bio, but some callers also don't pack bvecs tight.
  40. */
  41. while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
  42. struct bio_vec next;
  43. if (!__blk_map_iter_next(iter))
  44. break;
  45. next = mp_bvec_iter_bvec(iter->bvecs, iter->iter);
  46. if (bv.bv_len + next.bv_len > max_size ||
  47. !biovec_phys_mergeable(req->q, &bv, &next))
  48. break;
  49. bv.bv_len += next.bv_len;
  50. bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len);
  51. }
  52. vec->len = bv.bv_len;
  53. return true;
  54. }
  55. /*
  56. * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page
  57. * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so
  58. * we need to ensure our segments are aligned to this as well.
  59. *
  60. * Note that there is no point in using the slightly more complicated IOVA based
  61. * path for single segment mappings.
  62. */
  63. static inline bool blk_can_dma_map_iova(struct request *req,
  64. struct device *dma_dev)
  65. {
  66. return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev));
  67. }
  68. static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
  69. {
  70. iter->addr = pci_p2pdma_bus_addr_map(iter->p2pdma.mem, vec->paddr);
  71. iter->len = vec->len;
  72. return true;
  73. }
  74. static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
  75. struct blk_dma_iter *iter, struct phys_vec *vec)
  76. {
  77. unsigned int attrs = 0;
  78. if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
  79. attrs |= DMA_ATTR_MMIO;
  80. iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len,
  81. rq_dma_dir(req), attrs);
  82. if (dma_mapping_error(dma_dev, iter->addr)) {
  83. iter->status = BLK_STS_RESOURCE;
  84. return false;
  85. }
  86. iter->len = vec->len;
  87. return true;
  88. }
  89. static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
  90. struct dma_iova_state *state, struct blk_dma_iter *iter,
  91. struct phys_vec *vec)
  92. {
  93. enum dma_data_direction dir = rq_dma_dir(req);
  94. unsigned int attrs = 0;
  95. size_t mapped = 0;
  96. int error;
  97. iter->addr = state->addr;
  98. iter->len = dma_iova_size(state);
  99. if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
  100. attrs |= DMA_ATTR_MMIO;
  101. do {
  102. error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
  103. vec->len, dir, attrs);
  104. if (error)
  105. goto out_unlink;
  106. mapped += vec->len;
  107. } while (blk_map_iter_next(req, &iter->iter, vec));
  108. error = dma_iova_sync(dma_dev, state, 0, mapped);
  109. if (error)
  110. goto out_unlink;
  111. return true;
  112. out_unlink:
  113. dma_iova_destroy(dma_dev, state, mapped, dir, attrs);
  114. iter->status = errno_to_blk_status(error);
  115. return false;
  116. }
  117. static inline void blk_rq_map_iter_init(struct request *rq,
  118. struct blk_map_iter *iter)
  119. {
  120. struct bio *bio = rq->bio;
  121. if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
  122. *iter = (struct blk_map_iter) {
  123. .bvecs = &rq->special_vec,
  124. .iter = {
  125. .bi_size = rq->special_vec.bv_len,
  126. }
  127. };
  128. } else if (bio) {
  129. *iter = (struct blk_map_iter) {
  130. .bio = bio,
  131. .bvecs = bio->bi_io_vec,
  132. .iter = bio->bi_iter,
  133. };
  134. } else {
  135. /* the internal flush request may not have bio attached */
  136. *iter = (struct blk_map_iter) {};
  137. }
  138. }
  139. static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev,
  140. struct dma_iova_state *state, struct blk_dma_iter *iter,
  141. unsigned int total_len)
  142. {
  143. struct phys_vec vec;
  144. memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
  145. iter->status = BLK_STS_OK;
  146. iter->p2pdma.map = PCI_P2PDMA_MAP_NONE;
  147. /*
  148. * Grab the first segment ASAP because we'll need it to check for P2P
  149. * transfers.
  150. */
  151. if (!blk_map_iter_next(req, &iter->iter, &vec))
  152. return false;
  153. switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
  154. phys_to_page(vec.paddr))) {
  155. case PCI_P2PDMA_MAP_BUS_ADDR:
  156. return blk_dma_map_bus(iter, &vec);
  157. case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
  158. /*
  159. * P2P transfers through the host bridge are treated the
  160. * same as non-P2P transfers below and during unmap.
  161. */
  162. case PCI_P2PDMA_MAP_NONE:
  163. break;
  164. default:
  165. iter->status = BLK_STS_INVAL;
  166. return false;
  167. }
  168. if (blk_can_dma_map_iova(req, dma_dev) &&
  169. dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len))
  170. return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec);
  171. memset(state, 0, sizeof(*state));
  172. return blk_dma_map_direct(req, dma_dev, iter, &vec);
  173. }
  174. /**
  175. * blk_rq_dma_map_iter_start - map the first DMA segment for a request
  176. * @req: request to map
  177. * @dma_dev: device to map to
  178. * @state: DMA IOVA state
  179. * @iter: block layer DMA iterator
  180. *
  181. * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the
  182. * caller and don't need to be initialized. @state needs to be stored for use
  183. * at unmap time, @iter is only needed at map time.
  184. *
  185. * Returns %false if there is no segment to map, including due to an error, or
  186. * %true ft it did map a segment.
  187. *
  188. * If a segment was mapped, the DMA address for it is returned in @iter.addr and
  189. * the length in @iter.len. If no segment was mapped the status code is
  190. * returned in @iter.status.
  191. *
  192. * The caller can call blk_rq_dma_map_coalesce() to check if further segments
  193. * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
  194. * to try to map the following segments.
  195. */
  196. bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
  197. struct dma_iova_state *state, struct blk_dma_iter *iter)
  198. {
  199. blk_rq_map_iter_init(req, &iter->iter);
  200. return blk_dma_map_iter_start(req, dma_dev, state, iter,
  201. blk_rq_payload_bytes(req));
  202. }
  203. EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start);
  204. /**
  205. * blk_rq_dma_map_iter_next - map the next DMA segment for a request
  206. * @req: request to map
  207. * @dma_dev: device to map to
  208. * @iter: block layer DMA iterator
  209. *
  210. * Iterate to the next mapping after a previous call to
  211. * blk_rq_dma_map_iter_start(). See there for a detailed description of the
  212. * arguments.
  213. *
  214. * Returns %false if there is no segment to map, including due to an error, or
  215. * %true ft it did map a segment.
  216. *
  217. * If a segment was mapped, the DMA address for it is returned in @iter.addr and
  218. * the length in @iter.len. If no segment was mapped the status code is
  219. * returned in @iter.status.
  220. */
  221. bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev,
  222. struct blk_dma_iter *iter)
  223. {
  224. struct phys_vec vec;
  225. if (!blk_map_iter_next(req, &iter->iter, &vec))
  226. return false;
  227. if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
  228. return blk_dma_map_bus(iter, &vec);
  229. return blk_dma_map_direct(req, dma_dev, iter, &vec);
  230. }
  231. EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next);
  232. static inline struct scatterlist *
  233. blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist)
  234. {
  235. if (!*sg)
  236. return sglist;
  237. /*
  238. * If the driver previously mapped a shorter list, we could see a
  239. * termination bit prematurely unless it fully inits the sg table
  240. * on each mapping. We KNOW that there must be more entries here
  241. * or the driver would be buggy, so force clear the termination bit
  242. * to avoid doing a full sg_init_table() in drivers for each command.
  243. */
  244. sg_unmark_end(*sg);
  245. return sg_next(*sg);
  246. }
  247. /*
  248. * Map a request to scatterlist, return number of sg entries setup. Caller
  249. * must make sure sg can hold rq->nr_phys_segments entries.
  250. */
  251. int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
  252. struct scatterlist **last_sg)
  253. {
  254. struct blk_map_iter iter;
  255. struct phys_vec vec;
  256. int nsegs = 0;
  257. blk_rq_map_iter_init(rq, &iter);
  258. while (blk_map_iter_next(rq, &iter, &vec)) {
  259. *last_sg = blk_next_sg(last_sg, sglist);
  260. WARN_ON_ONCE(overflows_type(vec.len, unsigned int));
  261. sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
  262. offset_in_page(vec.paddr));
  263. nsegs++;
  264. }
  265. if (*last_sg)
  266. sg_mark_end(*last_sg);
  267. /*
  268. * Something must have been wrong if the figured number of
  269. * segment is bigger than number of req's physical segments
  270. */
  271. WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
  272. return nsegs;
  273. }
  274. EXPORT_SYMBOL(__blk_rq_map_sg);
  275. #ifdef CONFIG_BLK_DEV_INTEGRITY
  276. /**
  277. * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment
  278. * for a request
  279. * @req: request to map
  280. * @dma_dev: device to map to
  281. * @state: DMA IOVA state
  282. * @iter: block layer DMA iterator
  283. *
  284. * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are
  285. * provided by the caller and don't need to be initialized. @state needs to be
  286. * stored for use at unmap time, @iter is only needed at map time.
  287. *
  288. * Returns %false if there is no segment to map, including due to an error, or
  289. * %true if it did map a segment.
  290. *
  291. * If a segment was mapped, the DMA address for it is returned in @iter.addr
  292. * and the length in @iter.len. If no segment was mapped the status code is
  293. * returned in @iter.status.
  294. *
  295. * The caller can call blk_rq_dma_map_coalesce() to check if further segments
  296. * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
  297. * to try to map the following segments.
  298. */
  299. bool blk_rq_integrity_dma_map_iter_start(struct request *req,
  300. struct device *dma_dev, struct dma_iova_state *state,
  301. struct blk_dma_iter *iter)
  302. {
  303. unsigned len = bio_integrity_bytes(&req->q->limits.integrity,
  304. blk_rq_sectors(req));
  305. struct bio *bio = req->bio;
  306. iter->iter = (struct blk_map_iter) {
  307. .bio = bio,
  308. .iter = bio_integrity(bio)->bip_iter,
  309. .bvecs = bio_integrity(bio)->bip_vec,
  310. .is_integrity = true,
  311. };
  312. return blk_dma_map_iter_start(req, dma_dev, state, iter, len);
  313. }
  314. EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start);
  315. /**
  316. * blk_rq_integrity_dma_map_iter_next - map the next integrity DMA segment for
  317. * a request
  318. * @req: request to map
  319. * @dma_dev: device to map to
  320. * @state: DMA IOVA state
  321. * @iter: block layer DMA iterator
  322. *
  323. * Iterate to the next integrity mapping after a previous call to
  324. * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description
  325. * of the arguments.
  326. *
  327. * Returns %false if there is no segment to map, including due to an error, or
  328. * %true if it did map a segment.
  329. *
  330. * If a segment was mapped, the DMA address for it is returned in @iter.addr and
  331. * the length in @iter.len. If no segment was mapped the status code is
  332. * returned in @iter.status.
  333. */
  334. bool blk_rq_integrity_dma_map_iter_next(struct request *req,
  335. struct device *dma_dev, struct blk_dma_iter *iter)
  336. {
  337. struct phys_vec vec;
  338. if (!blk_map_iter_next(req, &iter->iter, &vec))
  339. return false;
  340. if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
  341. return blk_dma_map_bus(iter, &vec);
  342. return blk_dma_map_direct(req, dma_dev, iter, &vec);
  343. }
  344. EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next);
  345. /**
  346. * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
  347. * @rq: request to map
  348. * @sglist: target scatterlist
  349. *
  350. * Description: Map the integrity vectors in request into a
  351. * scatterlist. The scatterlist must be big enough to hold all
  352. * elements. I.e. sized using blk_rq_count_integrity_sg() or
  353. * rq->nr_integrity_segments.
  354. */
  355. int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
  356. {
  357. struct request_queue *q = rq->q;
  358. struct scatterlist *sg = NULL;
  359. struct bio *bio = rq->bio;
  360. unsigned int segments = 0;
  361. struct phys_vec vec;
  362. struct blk_map_iter iter = {
  363. .bio = bio,
  364. .iter = bio_integrity(bio)->bip_iter,
  365. .bvecs = bio_integrity(bio)->bip_vec,
  366. .is_integrity = true,
  367. };
  368. while (blk_map_iter_next(rq, &iter, &vec)) {
  369. sg = blk_next_sg(&sg, sglist);
  370. WARN_ON_ONCE(overflows_type(vec.len, unsigned int));
  371. sg_set_page(sg, phys_to_page(vec.paddr), vec.len,
  372. offset_in_page(vec.paddr));
  373. segments++;
  374. }
  375. if (sg)
  376. sg_mark_end(sg);
  377. /*
  378. * Something must have been wrong if the figured number of segment
  379. * is bigger than number of req's physical integrity segments
  380. */
  381. BUG_ON(segments > rq->nr_integrity_segments);
  382. BUG_ON(segments > queue_max_integrity_segments(q));
  383. return segments;
  384. }
  385. EXPORT_SYMBOL(blk_rq_map_integrity_sg);
  386. #endif