blk-merge.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Functions related to segment and merge handling
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/module.h>
  7. #include <linux/bio.h>
  8. #include <linux/blkdev.h>
  9. #include <linux/blk-integrity.h>
  10. #include <linux/part_stat.h>
  11. #include <linux/blk-cgroup.h>
  12. #include <trace/events/block.h>
  13. #include "blk.h"
  14. #include "blk-mq-sched.h"
  15. #include "blk-rq-qos.h"
  16. #include "blk-throttle.h"
  17. static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
  18. {
  19. *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
  20. }
  21. static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
  22. {
  23. struct bvec_iter iter = bio->bi_iter;
  24. int idx;
  25. bio_get_first_bvec(bio, bv);
  26. if (bv->bv_len == bio->bi_iter.bi_size)
  27. return; /* this bio only has a single bvec */
  28. bio_advance_iter(bio, &iter, iter.bi_size);
  29. if (!iter.bi_bvec_done)
  30. idx = iter.bi_idx - 1;
  31. else /* in the middle of bvec */
  32. idx = iter.bi_idx;
  33. *bv = bio->bi_io_vec[idx];
  34. /*
  35. * iter.bi_bvec_done records actual length of the last bvec
  36. * if this bio ends in the middle of one io vector
  37. */
  38. if (iter.bi_bvec_done)
  39. bv->bv_len = iter.bi_bvec_done;
  40. }
  41. static inline bool bio_will_gap(struct request_queue *q,
  42. struct request *prev_rq, struct bio *prev, struct bio *next)
  43. {
  44. struct bio_vec pb, nb;
  45. if (!bio_has_data(prev) || !queue_virt_boundary(q))
  46. return false;
  47. /*
  48. * Don't merge if the 1st bio starts with non-zero offset, otherwise it
  49. * is quite difficult to respect the sg gap limit. We work hard to
  50. * merge a huge number of small single bios in case of mkfs.
  51. */
  52. if (prev_rq)
  53. bio_get_first_bvec(prev_rq->bio, &pb);
  54. else
  55. bio_get_first_bvec(prev, &pb);
  56. if (pb.bv_offset & queue_virt_boundary(q))
  57. return true;
  58. /*
  59. * We don't need to worry about the situation that the merged segment
  60. * ends in unaligned virt boundary:
  61. *
  62. * - if 'pb' ends aligned, the merged segment ends aligned
  63. * - if 'pb' ends unaligned, the next bio must include
  64. * one single bvec of 'nb', otherwise the 'nb' can't
  65. * merge with 'pb'
  66. */
  67. bio_get_last_bvec(prev, &pb);
  68. bio_get_first_bvec(next, &nb);
  69. if (biovec_phys_mergeable(q, &pb, &nb))
  70. return false;
  71. return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
  72. }
  73. static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
  74. {
  75. return bio_will_gap(req->q, req, req->biotail, bio);
  76. }
  77. static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
  78. {
  79. return bio_will_gap(req->q, NULL, bio, req->bio);
  80. }
  81. /*
  82. * The maximum size that a bio can fit has to be aligned down to the
  83. * logical block size, which is the minimum accepted unit by hardware.
  84. */
  85. static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
  86. {
  87. return round_down(BIO_MAX_SIZE, lim->logical_block_size) >>
  88. SECTOR_SHIFT;
  89. }
  90. /*
  91. * bio_submit_split_bioset - Submit a bio, splitting it at a designated sector
  92. * @bio: the original bio to be submitted and split
  93. * @split_sectors: the sector count at which to split
  94. * @bs: the bio set used for allocating the new split bio
  95. *
  96. * The original bio is modified to contain the remaining sectors and submitted.
  97. * The caller is responsible for submitting the returned bio.
  98. *
  99. * If succeed, the newly allocated bio representing the initial part will be
  100. * returned, on failure NULL will be returned and original bio will fail.
  101. */
  102. struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
  103. struct bio_set *bs)
  104. {
  105. struct bio *split = bio_split(bio, split_sectors, GFP_NOIO, bs);
  106. if (IS_ERR(split)) {
  107. bio->bi_status = errno_to_blk_status(PTR_ERR(split));
  108. bio_endio(bio);
  109. return NULL;
  110. }
  111. bio_chain(split, bio);
  112. trace_block_split(split, bio->bi_iter.bi_sector);
  113. WARN_ON_ONCE(bio_zone_write_plugging(bio));
  114. if (should_fail_bio(bio))
  115. bio_io_error(bio);
  116. else if (!blk_throtl_bio(bio))
  117. submit_bio_noacct_nocheck(bio, true);
  118. return split;
  119. }
  120. EXPORT_SYMBOL_GPL(bio_submit_split_bioset);
  121. static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
  122. {
  123. if (unlikely(split_sectors < 0)) {
  124. bio->bi_status = errno_to_blk_status(split_sectors);
  125. bio_endio(bio);
  126. return NULL;
  127. }
  128. if (split_sectors) {
  129. bio = bio_submit_split_bioset(bio, split_sectors,
  130. &bio->bi_bdev->bd_disk->bio_split);
  131. if (bio)
  132. bio->bi_opf |= REQ_NOMERGE;
  133. }
  134. return bio;
  135. }
  136. static struct bio *__bio_split_discard(struct bio *bio,
  137. const struct queue_limits *lim, unsigned *nsegs,
  138. unsigned int max_sectors)
  139. {
  140. unsigned int max_discard_sectors, granularity;
  141. sector_t tmp;
  142. unsigned split_sectors;
  143. *nsegs = 1;
  144. granularity = max(lim->discard_granularity >> 9, 1U);
  145. max_discard_sectors = min(max_sectors, bio_allowed_max_sectors(lim));
  146. max_discard_sectors -= max_discard_sectors % granularity;
  147. if (unlikely(!max_discard_sectors))
  148. return bio;
  149. if (bio_sectors(bio) <= max_discard_sectors)
  150. return bio;
  151. split_sectors = max_discard_sectors;
  152. /*
  153. * If the next starting sector would be misaligned, stop the discard at
  154. * the previous aligned sector.
  155. */
  156. tmp = bio->bi_iter.bi_sector + split_sectors -
  157. ((lim->discard_alignment >> 9) % granularity);
  158. tmp = sector_div(tmp, granularity);
  159. if (split_sectors > tmp)
  160. split_sectors -= tmp;
  161. return bio_submit_split(bio, split_sectors);
  162. }
  163. struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
  164. unsigned *nsegs)
  165. {
  166. unsigned int max_sectors;
  167. if (bio_op(bio) == REQ_OP_SECURE_ERASE)
  168. max_sectors = lim->max_secure_erase_sectors;
  169. else
  170. max_sectors = lim->max_discard_sectors;
  171. return __bio_split_discard(bio, lim, nsegs, max_sectors);
  172. }
  173. static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
  174. bool is_atomic)
  175. {
  176. /*
  177. * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
  178. * both non-zero.
  179. */
  180. if (is_atomic && lim->atomic_write_boundary_sectors)
  181. return lim->atomic_write_boundary_sectors;
  182. return lim->chunk_sectors;
  183. }
  184. /*
  185. * Return the maximum number of sectors from the start of a bio that may be
  186. * submitted as a single request to a block device. If enough sectors remain,
  187. * align the end to the physical block size. Otherwise align the end to the
  188. * logical block size. This approach minimizes the number of non-aligned
  189. * requests that are submitted to a block device if the start of a bio is not
  190. * aligned to a physical block boundary.
  191. */
  192. static inline unsigned get_max_io_size(struct bio *bio,
  193. const struct queue_limits *lim)
  194. {
  195. unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
  196. unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
  197. bool is_atomic = bio->bi_opf & REQ_ATOMIC;
  198. unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
  199. unsigned max_sectors, start, end;
  200. /*
  201. * We ignore lim->max_sectors for atomic writes because it may less
  202. * than the actual bio size, which we cannot tolerate.
  203. */
  204. if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
  205. max_sectors = lim->max_write_zeroes_sectors;
  206. else if (is_atomic)
  207. max_sectors = lim->atomic_write_max_sectors;
  208. else
  209. max_sectors = lim->max_sectors;
  210. if (boundary_sectors) {
  211. max_sectors = min(max_sectors,
  212. blk_boundary_sectors_left(bio->bi_iter.bi_sector,
  213. boundary_sectors));
  214. }
  215. start = bio->bi_iter.bi_sector & (pbs - 1);
  216. end = (start + max_sectors) & ~(pbs - 1);
  217. if (end > start)
  218. return end - start;
  219. return max_sectors & ~(lbs - 1);
  220. }
  221. /**
  222. * bvec_split_segs - verify whether or not a bvec should be split in the middle
  223. * @lim: [in] queue limits to split based on
  224. * @bv: [in] bvec to examine
  225. * @nsegs: [in,out] Number of segments in the bio being built. Incremented
  226. * by the number of segments from @bv that may be appended to that
  227. * bio without exceeding @max_segs
  228. * @bytes: [in,out] Number of bytes in the bio being built. Incremented
  229. * by the number of bytes from @bv that may be appended to that
  230. * bio without exceeding @max_bytes
  231. * @max_segs: [in] upper bound for *@nsegs
  232. * @max_bytes: [in] upper bound for *@bytes
  233. *
  234. * When splitting a bio, it can happen that a bvec is encountered that is too
  235. * big to fit in a single segment and hence that it has to be split in the
  236. * middle. This function verifies whether or not that should happen. The value
  237. * %true is returned if and only if appending the entire @bv to a bio with
  238. * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
  239. * the block driver.
  240. */
  241. static bool bvec_split_segs(const struct queue_limits *lim,
  242. const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
  243. unsigned max_segs, unsigned max_bytes)
  244. {
  245. unsigned max_len = max_bytes - *bytes;
  246. unsigned len = min(bv->bv_len, max_len);
  247. unsigned total_len = 0;
  248. unsigned seg_size = 0;
  249. while (len && *nsegs < max_segs) {
  250. seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
  251. (*nsegs)++;
  252. total_len += seg_size;
  253. len -= seg_size;
  254. if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
  255. break;
  256. }
  257. *bytes += total_len;
  258. /* tell the caller to split the bvec if it is too big to fit */
  259. return len > 0 || bv->bv_len > max_len;
  260. }
  261. static unsigned int bio_split_alignment(struct bio *bio,
  262. const struct queue_limits *lim)
  263. {
  264. if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
  265. return lim->zone_write_granularity;
  266. return lim->logical_block_size;
  267. }
  268. static inline unsigned int bvec_seg_gap(struct bio_vec *bvprv,
  269. struct bio_vec *bv)
  270. {
  271. return bv->bv_offset | (bvprv->bv_offset + bvprv->bv_len);
  272. }
  273. /**
  274. * bio_split_io_at - check if and where to split a bio
  275. * @bio: [in] bio to be split
  276. * @lim: [in] queue limits to split based on
  277. * @segs: [out] number of segments in the bio with the first half of the sectors
  278. * @max_bytes: [in] maximum number of bytes per bio
  279. * @len_align_mask: [in] length alignment mask for each vector
  280. *
  281. * Find out if @bio needs to be split to fit the queue limits in @lim and a
  282. * maximum size of @max_bytes. Returns a negative error number if @bio can't be
  283. * split, 0 if the bio doesn't have to be split, or a positive sector offset if
  284. * @bio needs to be split.
  285. */
  286. int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
  287. unsigned *segs, unsigned max_bytes, unsigned len_align_mask)
  288. {
  289. struct bio_crypt_ctx *bc = bio_crypt_ctx(bio);
  290. struct bio_vec bv, bvprv, *bvprvp = NULL;
  291. unsigned nsegs = 0, bytes = 0, gaps = 0;
  292. struct bvec_iter iter;
  293. unsigned start_align_mask = lim->dma_alignment;
  294. if (bc) {
  295. start_align_mask |= (bc->bc_key->crypto_cfg.data_unit_size - 1);
  296. len_align_mask |= (bc->bc_key->crypto_cfg.data_unit_size - 1);
  297. }
  298. bio_for_each_bvec(bv, bio, iter) {
  299. if (bv.bv_offset & start_align_mask ||
  300. bv.bv_len & len_align_mask)
  301. return -EINVAL;
  302. /*
  303. * If the queue doesn't support SG gaps and adding this
  304. * offset would create a gap, disallow it.
  305. */
  306. if (bvprvp) {
  307. if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
  308. goto split;
  309. gaps |= bvec_seg_gap(bvprvp, &bv);
  310. }
  311. if (nsegs < lim->max_segments &&
  312. bytes + bv.bv_len <= max_bytes &&
  313. bv.bv_offset + bv.bv_len <= lim->max_fast_segment_size) {
  314. nsegs++;
  315. bytes += bv.bv_len;
  316. } else {
  317. if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
  318. lim->max_segments, max_bytes))
  319. goto split;
  320. }
  321. bvprv = bv;
  322. bvprvp = &bvprv;
  323. }
  324. *segs = nsegs;
  325. bio->bi_bvec_gap_bit = ffs(gaps);
  326. return 0;
  327. split:
  328. if (bio->bi_opf & REQ_ATOMIC)
  329. return -EINVAL;
  330. /*
  331. * We can't sanely support splitting for a REQ_NOWAIT bio. End it
  332. * with EAGAIN if splitting is required and return an error pointer.
  333. */
  334. if (bio->bi_opf & REQ_NOWAIT)
  335. return -EAGAIN;
  336. *segs = nsegs;
  337. /*
  338. * Individual bvecs might not be logical block aligned. Round down the
  339. * split size so that each bio is properly block size aligned, even if
  340. * we do not use the full hardware limits.
  341. *
  342. * It is possible to submit a bio that can't be split into a valid io:
  343. * there may either be too many discontiguous vectors for the max
  344. * segments limit, or contain virtual boundary gaps without having a
  345. * valid block sized split. A zero byte result means one of those
  346. * conditions occured.
  347. */
  348. bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
  349. if (!bytes)
  350. return -EINVAL;
  351. /*
  352. * Bio splitting may cause subtle trouble such as hang when doing sync
  353. * iopoll in direct IO routine. Given performance gain of iopoll for
  354. * big IO can be trival, disable iopoll when split needed.
  355. */
  356. bio_clear_polled(bio);
  357. bio->bi_bvec_gap_bit = ffs(gaps);
  358. return bytes >> SECTOR_SHIFT;
  359. }
  360. EXPORT_SYMBOL_GPL(bio_split_io_at);
  361. struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
  362. unsigned *nr_segs)
  363. {
  364. return bio_submit_split(bio,
  365. bio_split_rw_at(bio, lim, nr_segs,
  366. get_max_io_size(bio, lim) << SECTOR_SHIFT));
  367. }
  368. /*
  369. * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
  370. *
  371. * But we want the nr_segs calculation provided by bio_split_rw_at, and having
  372. * a good sanity check that the submitter built the bio correctly is nice to
  373. * have as well.
  374. */
  375. struct bio *bio_split_zone_append(struct bio *bio,
  376. const struct queue_limits *lim, unsigned *nr_segs)
  377. {
  378. int split_sectors;
  379. split_sectors = bio_split_rw_at(bio, lim, nr_segs,
  380. lim->max_zone_append_sectors << SECTOR_SHIFT);
  381. if (WARN_ON_ONCE(split_sectors > 0))
  382. split_sectors = -EINVAL;
  383. return bio_submit_split(bio, split_sectors);
  384. }
  385. struct bio *bio_split_write_zeroes(struct bio *bio,
  386. const struct queue_limits *lim, unsigned *nsegs)
  387. {
  388. unsigned int max_sectors = get_max_io_size(bio, lim);
  389. *nsegs = 0;
  390. /*
  391. * An unset limit should normally not happen, as bio submission is keyed
  392. * off having a non-zero limit. But SCSI can clear the limit in the
  393. * I/O completion handler, and we can race and see this. Splitting to a
  394. * zero limit obviously doesn't make sense, so band-aid it here.
  395. */
  396. if (!max_sectors)
  397. return bio;
  398. if (bio_sectors(bio) <= max_sectors)
  399. return bio;
  400. return bio_submit_split(bio, max_sectors);
  401. }
  402. /**
  403. * bio_split_to_limits - split a bio to fit the queue limits
  404. * @bio: bio to be split
  405. *
  406. * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
  407. * if so split off a bio fitting the limits from the beginning of @bio and
  408. * return it. @bio is shortened to the remainder and re-submitted.
  409. *
  410. * The split bio is allocated from @q->bio_split, which is provided by the
  411. * block layer.
  412. */
  413. struct bio *bio_split_to_limits(struct bio *bio)
  414. {
  415. unsigned int nr_segs;
  416. return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
  417. }
  418. EXPORT_SYMBOL(bio_split_to_limits);
  419. unsigned int blk_recalc_rq_segments(struct request *rq)
  420. {
  421. unsigned int nr_phys_segs = 0;
  422. unsigned int bytes = 0;
  423. struct req_iterator iter;
  424. struct bio_vec bv;
  425. if (!rq->bio)
  426. return 0;
  427. switch (bio_op(rq->bio)) {
  428. case REQ_OP_DISCARD:
  429. case REQ_OP_SECURE_ERASE:
  430. if (queue_max_discard_segments(rq->q) > 1) {
  431. struct bio *bio = rq->bio;
  432. for_each_bio(bio)
  433. nr_phys_segs++;
  434. return nr_phys_segs;
  435. }
  436. return 1;
  437. case REQ_OP_WRITE_ZEROES:
  438. return 0;
  439. default:
  440. break;
  441. }
  442. rq_for_each_bvec(bv, rq, iter)
  443. bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
  444. UINT_MAX, BIO_MAX_SIZE);
  445. return nr_phys_segs;
  446. }
  447. static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
  448. sector_t offset)
  449. {
  450. struct request_queue *q = rq->q;
  451. struct queue_limits *lim = &q->limits;
  452. unsigned int max_sectors, boundary_sectors;
  453. bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
  454. if (blk_rq_is_passthrough(rq))
  455. return q->limits.max_hw_sectors;
  456. boundary_sectors = blk_boundary_sectors(lim, is_atomic);
  457. max_sectors = blk_queue_get_max_sectors(rq);
  458. if (!boundary_sectors ||
  459. req_op(rq) == REQ_OP_DISCARD ||
  460. req_op(rq) == REQ_OP_SECURE_ERASE)
  461. return max_sectors;
  462. return min(max_sectors,
  463. blk_boundary_sectors_left(offset, boundary_sectors));
  464. }
  465. static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
  466. unsigned int nr_phys_segs)
  467. {
  468. if (!blk_cgroup_mergeable(req, bio))
  469. goto no_merge;
  470. if (blk_integrity_merge_bio(req->q, req, bio) == false)
  471. goto no_merge;
  472. /* discard request merge won't add new segment */
  473. if (req_op(req) == REQ_OP_DISCARD)
  474. return 1;
  475. if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
  476. goto no_merge;
  477. /*
  478. * This will form the start of a new hw segment. Bump both
  479. * counters.
  480. */
  481. req->nr_phys_segments += nr_phys_segs;
  482. if (bio_integrity(bio))
  483. req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
  484. bio);
  485. return 1;
  486. no_merge:
  487. req_set_nomerge(req->q, req);
  488. return 0;
  489. }
  490. int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
  491. {
  492. if (req_gap_back_merge(req, bio))
  493. return 0;
  494. if (blk_integrity_rq(req) &&
  495. integrity_req_gap_back_merge(req, bio))
  496. return 0;
  497. if (!bio_crypt_ctx_back_mergeable(req, bio))
  498. return 0;
  499. if (blk_rq_sectors(req) + bio_sectors(bio) >
  500. blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
  501. req_set_nomerge(req->q, req);
  502. return 0;
  503. }
  504. return ll_new_hw_segment(req, bio, nr_segs);
  505. }
  506. static int ll_front_merge_fn(struct request *req, struct bio *bio,
  507. unsigned int nr_segs)
  508. {
  509. if (req_gap_front_merge(req, bio))
  510. return 0;
  511. if (blk_integrity_rq(req) &&
  512. integrity_req_gap_front_merge(req, bio))
  513. return 0;
  514. if (!bio_crypt_ctx_front_mergeable(req, bio))
  515. return 0;
  516. if (blk_rq_sectors(req) + bio_sectors(bio) >
  517. blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
  518. req_set_nomerge(req->q, req);
  519. return 0;
  520. }
  521. return ll_new_hw_segment(req, bio, nr_segs);
  522. }
  523. static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
  524. struct request *next)
  525. {
  526. unsigned short segments = blk_rq_nr_discard_segments(req);
  527. if (segments >= queue_max_discard_segments(q))
  528. goto no_merge;
  529. if (blk_rq_sectors(req) + bio_sectors(next->bio) >
  530. blk_rq_get_max_sectors(req, blk_rq_pos(req)))
  531. goto no_merge;
  532. req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
  533. return true;
  534. no_merge:
  535. req_set_nomerge(q, req);
  536. return false;
  537. }
  538. static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
  539. struct request *next)
  540. {
  541. int total_phys_segments;
  542. if (req_gap_back_merge(req, next->bio))
  543. return 0;
  544. /*
  545. * Will it become too large?
  546. */
  547. if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
  548. blk_rq_get_max_sectors(req, blk_rq_pos(req)))
  549. return 0;
  550. total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
  551. if (total_phys_segments > blk_rq_get_max_segments(req))
  552. return 0;
  553. if (!blk_cgroup_mergeable(req, next->bio))
  554. return 0;
  555. if (blk_integrity_merge_rq(q, req, next) == false)
  556. return 0;
  557. if (!bio_crypt_ctx_merge_rq(req, next))
  558. return 0;
  559. /* Merge is OK... */
  560. req->nr_phys_segments = total_phys_segments;
  561. req->nr_integrity_segments += next->nr_integrity_segments;
  562. return 1;
  563. }
  564. /**
  565. * blk_rq_set_mixed_merge - mark a request as mixed merge
  566. * @rq: request to mark as mixed merge
  567. *
  568. * Description:
  569. * @rq is about to be mixed merged. Make sure the attributes
  570. * which can be mixed are set in each bio and mark @rq as mixed
  571. * merged.
  572. */
  573. static void blk_rq_set_mixed_merge(struct request *rq)
  574. {
  575. blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
  576. struct bio *bio;
  577. if (rq->rq_flags & RQF_MIXED_MERGE)
  578. return;
  579. /*
  580. * @rq will no longer represent mixable attributes for all the
  581. * contained bios. It will just track those of the first one.
  582. * Distributes the attributs to each bio.
  583. */
  584. for (bio = rq->bio; bio; bio = bio->bi_next) {
  585. WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
  586. (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
  587. bio->bi_opf |= ff;
  588. }
  589. rq->rq_flags |= RQF_MIXED_MERGE;
  590. }
  591. static inline blk_opf_t bio_failfast(const struct bio *bio)
  592. {
  593. if (bio->bi_opf & REQ_RAHEAD)
  594. return REQ_FAILFAST_MASK;
  595. return bio->bi_opf & REQ_FAILFAST_MASK;
  596. }
  597. /*
  598. * After we are marked as MIXED_MERGE, any new RA bio has to be updated
  599. * as failfast, and request's failfast has to be updated in case of
  600. * front merge.
  601. */
  602. static inline void blk_update_mixed_merge(struct request *req,
  603. struct bio *bio, bool front_merge)
  604. {
  605. if (req->rq_flags & RQF_MIXED_MERGE) {
  606. if (bio->bi_opf & REQ_RAHEAD)
  607. bio->bi_opf |= REQ_FAILFAST_MASK;
  608. if (front_merge) {
  609. req->cmd_flags &= ~REQ_FAILFAST_MASK;
  610. req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
  611. }
  612. }
  613. }
  614. static void blk_account_io_merge_request(struct request *req)
  615. {
  616. if (req->rq_flags & RQF_IO_STAT) {
  617. part_stat_lock();
  618. part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
  619. part_stat_local_dec(req->part,
  620. in_flight[op_is_write(req_op(req))]);
  621. part_stat_unlock();
  622. }
  623. }
  624. static enum elv_merge blk_try_req_merge(struct request *req,
  625. struct request *next)
  626. {
  627. if (blk_discard_mergable(req))
  628. return ELEVATOR_DISCARD_MERGE;
  629. else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
  630. return ELEVATOR_BACK_MERGE;
  631. return ELEVATOR_NO_MERGE;
  632. }
  633. static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
  634. struct bio *bio)
  635. {
  636. return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
  637. }
  638. static bool blk_atomic_write_mergeable_rqs(struct request *rq,
  639. struct request *next)
  640. {
  641. return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
  642. }
  643. u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
  644. u8 gaps_bit)
  645. {
  646. struct bio_vec pb, nb;
  647. if (!bio_has_data(prev))
  648. return 0;
  649. gaps_bit = min_not_zero(gaps_bit, prev->bi_bvec_gap_bit);
  650. gaps_bit = min_not_zero(gaps_bit, next->bi_bvec_gap_bit);
  651. bio_get_last_bvec(prev, &pb);
  652. bio_get_first_bvec(next, &nb);
  653. if (!biovec_phys_mergeable(q, &pb, &nb))
  654. gaps_bit = min_not_zero(gaps_bit, ffs(bvec_seg_gap(&pb, &nb)));
  655. return gaps_bit;
  656. }
  657. /*
  658. * For non-mq, this has to be called with the request spinlock acquired.
  659. * For mq with scheduling, the appropriate queue wide lock should be held.
  660. */
  661. static struct request *attempt_merge(struct request_queue *q,
  662. struct request *req, struct request *next)
  663. {
  664. if (!rq_mergeable(req) || !rq_mergeable(next))
  665. return NULL;
  666. if (req_op(req) != req_op(next))
  667. return NULL;
  668. if (req->bio->bi_write_hint != next->bio->bi_write_hint)
  669. return NULL;
  670. if (req->bio->bi_write_stream != next->bio->bi_write_stream)
  671. return NULL;
  672. if (req->bio->bi_ioprio != next->bio->bi_ioprio)
  673. return NULL;
  674. if (!blk_atomic_write_mergeable_rqs(req, next))
  675. return NULL;
  676. /*
  677. * If we are allowed to merge, then append bio list
  678. * from next to rq and release next. merge_requests_fn
  679. * will have updated segment counts, update sector
  680. * counts here. Handle DISCARDs separately, as they
  681. * have separate settings.
  682. */
  683. switch (blk_try_req_merge(req, next)) {
  684. case ELEVATOR_DISCARD_MERGE:
  685. if (!req_attempt_discard_merge(q, req, next))
  686. return NULL;
  687. break;
  688. case ELEVATOR_BACK_MERGE:
  689. if (!ll_merge_requests_fn(q, req, next))
  690. return NULL;
  691. break;
  692. default:
  693. return NULL;
  694. }
  695. /*
  696. * If failfast settings disagree or any of the two is already
  697. * a mixed merge, mark both as mixed before proceeding. This
  698. * makes sure that all involved bios have mixable attributes
  699. * set properly.
  700. */
  701. if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
  702. (req->cmd_flags & REQ_FAILFAST_MASK) !=
  703. (next->cmd_flags & REQ_FAILFAST_MASK)) {
  704. blk_rq_set_mixed_merge(req);
  705. blk_rq_set_mixed_merge(next);
  706. }
  707. /*
  708. * At this point we have either done a back merge or front merge. We
  709. * need the smaller start_time_ns of the merged requests to be the
  710. * current request for accounting purposes.
  711. */
  712. if (next->start_time_ns < req->start_time_ns)
  713. req->start_time_ns = next->start_time_ns;
  714. req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, next->bio,
  715. min_not_zero(next->phys_gap_bit,
  716. req->phys_gap_bit));
  717. req->biotail->bi_next = next->bio;
  718. req->biotail = next->biotail;
  719. req->__data_len += blk_rq_bytes(next);
  720. if (!blk_discard_mergable(req))
  721. elv_merge_requests(q, req, next);
  722. blk_crypto_rq_put_keyslot(next);
  723. /*
  724. * 'next' is going away, so update stats accordingly
  725. */
  726. blk_account_io_merge_request(next);
  727. trace_block_rq_merge(next);
  728. /*
  729. * ownership of bio passed from next to req, return 'next' for
  730. * the caller to free
  731. */
  732. next->bio = NULL;
  733. return next;
  734. }
  735. static struct request *attempt_back_merge(struct request_queue *q,
  736. struct request *rq)
  737. {
  738. struct request *next = elv_latter_request(q, rq);
  739. if (next)
  740. return attempt_merge(q, rq, next);
  741. return NULL;
  742. }
  743. static struct request *attempt_front_merge(struct request_queue *q,
  744. struct request *rq)
  745. {
  746. struct request *prev = elv_former_request(q, rq);
  747. if (prev)
  748. return attempt_merge(q, prev, rq);
  749. return NULL;
  750. }
  751. /*
  752. * Try to merge 'next' into 'rq'. Return true if the merge happened, false
  753. * otherwise. The caller is responsible for freeing 'next' if the merge
  754. * happened.
  755. */
  756. bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
  757. struct request *next)
  758. {
  759. return attempt_merge(q, rq, next);
  760. }
  761. bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
  762. {
  763. if (!rq_mergeable(rq) || !bio_mergeable(bio))
  764. return false;
  765. if (req_op(rq) != bio_op(bio))
  766. return false;
  767. if (!blk_cgroup_mergeable(rq, bio))
  768. return false;
  769. if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
  770. return false;
  771. if (!bio_crypt_rq_ctx_compatible(rq, bio))
  772. return false;
  773. if (rq->bio->bi_write_hint != bio->bi_write_hint)
  774. return false;
  775. if (rq->bio->bi_write_stream != bio->bi_write_stream)
  776. return false;
  777. if (rq->bio->bi_ioprio != bio->bi_ioprio)
  778. return false;
  779. if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
  780. return false;
  781. return true;
  782. }
  783. enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
  784. {
  785. if (blk_discard_mergable(rq))
  786. return ELEVATOR_DISCARD_MERGE;
  787. else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
  788. return ELEVATOR_BACK_MERGE;
  789. else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
  790. return ELEVATOR_FRONT_MERGE;
  791. return ELEVATOR_NO_MERGE;
  792. }
  793. static void blk_account_io_merge_bio(struct request *req)
  794. {
  795. if (req->rq_flags & RQF_IO_STAT) {
  796. part_stat_lock();
  797. part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
  798. part_stat_unlock();
  799. }
  800. }
  801. enum bio_merge_status bio_attempt_back_merge(struct request *req,
  802. struct bio *bio, unsigned int nr_segs)
  803. {
  804. const blk_opf_t ff = bio_failfast(bio);
  805. if (!ll_back_merge_fn(req, bio, nr_segs))
  806. return BIO_MERGE_FAILED;
  807. trace_block_bio_backmerge(bio);
  808. rq_qos_merge(req->q, req, bio);
  809. if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  810. blk_rq_set_mixed_merge(req);
  811. blk_update_mixed_merge(req, bio, false);
  812. if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
  813. blk_zone_write_plug_bio_merged(bio);
  814. req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, bio,
  815. req->phys_gap_bit);
  816. req->biotail->bi_next = bio;
  817. req->biotail = bio;
  818. req->__data_len += bio->bi_iter.bi_size;
  819. bio_crypt_free_ctx(bio);
  820. blk_account_io_merge_bio(req);
  821. return BIO_MERGE_OK;
  822. }
  823. static enum bio_merge_status bio_attempt_front_merge(struct request *req,
  824. struct bio *bio, unsigned int nr_segs)
  825. {
  826. const blk_opf_t ff = bio_failfast(bio);
  827. /*
  828. * A front merge for writes to sequential zones of a zoned block device
  829. * can happen only if the user submitted writes out of order. Do not
  830. * merge such write to let it fail.
  831. */
  832. if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
  833. return BIO_MERGE_FAILED;
  834. if (!ll_front_merge_fn(req, bio, nr_segs))
  835. return BIO_MERGE_FAILED;
  836. trace_block_bio_frontmerge(bio);
  837. rq_qos_merge(req->q, req, bio);
  838. if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  839. blk_rq_set_mixed_merge(req);
  840. blk_update_mixed_merge(req, bio, true);
  841. req->phys_gap_bit = bio_seg_gap(req->q, bio, req->bio,
  842. req->phys_gap_bit);
  843. bio->bi_next = req->bio;
  844. req->bio = bio;
  845. req->__sector = bio->bi_iter.bi_sector;
  846. req->__data_len += bio->bi_iter.bi_size;
  847. bio_crypt_do_front_merge(req, bio);
  848. blk_account_io_merge_bio(req);
  849. return BIO_MERGE_OK;
  850. }
  851. static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
  852. struct request *req, struct bio *bio)
  853. {
  854. unsigned short segments = blk_rq_nr_discard_segments(req);
  855. if (segments >= queue_max_discard_segments(q))
  856. goto no_merge;
  857. if (blk_rq_sectors(req) + bio_sectors(bio) >
  858. blk_rq_get_max_sectors(req, blk_rq_pos(req)))
  859. goto no_merge;
  860. rq_qos_merge(q, req, bio);
  861. req->biotail->bi_next = bio;
  862. req->biotail = bio;
  863. req->__data_len += bio->bi_iter.bi_size;
  864. req->nr_phys_segments = segments + 1;
  865. blk_account_io_merge_bio(req);
  866. return BIO_MERGE_OK;
  867. no_merge:
  868. req_set_nomerge(q, req);
  869. return BIO_MERGE_FAILED;
  870. }
  871. static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
  872. struct request *rq,
  873. struct bio *bio,
  874. unsigned int nr_segs,
  875. bool sched_allow_merge)
  876. {
  877. if (!blk_rq_merge_ok(rq, bio))
  878. return BIO_MERGE_NONE;
  879. switch (blk_try_merge(rq, bio)) {
  880. case ELEVATOR_BACK_MERGE:
  881. if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
  882. return bio_attempt_back_merge(rq, bio, nr_segs);
  883. break;
  884. case ELEVATOR_FRONT_MERGE:
  885. if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
  886. return bio_attempt_front_merge(rq, bio, nr_segs);
  887. break;
  888. case ELEVATOR_DISCARD_MERGE:
  889. return bio_attempt_discard_merge(q, rq, bio);
  890. default:
  891. return BIO_MERGE_NONE;
  892. }
  893. return BIO_MERGE_FAILED;
  894. }
  895. /**
  896. * blk_attempt_plug_merge - try to merge with %current's plugged list
  897. * @q: request_queue new bio is being queued at
  898. * @bio: new bio being queued
  899. * @nr_segs: number of segments in @bio
  900. * from the passed in @q already in the plug list
  901. *
  902. * Determine whether @bio being queued on @q can be merged with the previous
  903. * request on %current's plugged list. Returns %true if merge was successful,
  904. * otherwise %false.
  905. *
  906. * Plugging coalesces IOs from the same issuer for the same purpose without
  907. * going through @q->queue_lock. As such it's more of an issuing mechanism
  908. * than scheduling, and the request, while may have elvpriv data, is not
  909. * added on the elevator at this point. In addition, we don't have
  910. * reliable access to the elevator outside queue lock. Only check basic
  911. * merging parameters without querying the elevator.
  912. *
  913. * Caller must ensure !blk_queue_nomerges(q) beforehand.
  914. */
  915. bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
  916. unsigned int nr_segs)
  917. {
  918. struct blk_plug *plug = current->plug;
  919. struct request *rq;
  920. if (!plug || rq_list_empty(&plug->mq_list))
  921. return false;
  922. rq = plug->mq_list.tail;
  923. if (rq->q == q)
  924. return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
  925. BIO_MERGE_OK;
  926. else if (!plug->multiple_queues)
  927. return false;
  928. rq_list_for_each(&plug->mq_list, rq) {
  929. if (rq->q != q)
  930. continue;
  931. if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
  932. BIO_MERGE_OK)
  933. return true;
  934. break;
  935. }
  936. return false;
  937. }
  938. /*
  939. * Iterate list of requests and see if we can merge this bio with any
  940. * of them.
  941. */
  942. bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
  943. struct bio *bio, unsigned int nr_segs)
  944. {
  945. struct request *rq;
  946. int checked = 8;
  947. list_for_each_entry_reverse(rq, list, queuelist) {
  948. if (!checked--)
  949. break;
  950. switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
  951. case BIO_MERGE_NONE:
  952. continue;
  953. case BIO_MERGE_OK:
  954. return true;
  955. case BIO_MERGE_FAILED:
  956. return false;
  957. }
  958. }
  959. return false;
  960. }
  961. EXPORT_SYMBOL_GPL(blk_bio_list_merge);
  962. bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
  963. unsigned int nr_segs, struct request **merged_request)
  964. {
  965. struct request *rq;
  966. switch (elv_merge(q, &rq, bio)) {
  967. case ELEVATOR_BACK_MERGE:
  968. if (!blk_mq_sched_allow_merge(q, rq, bio))
  969. return false;
  970. if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
  971. return false;
  972. *merged_request = attempt_back_merge(q, rq);
  973. if (!*merged_request)
  974. elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
  975. return true;
  976. case ELEVATOR_FRONT_MERGE:
  977. if (!blk_mq_sched_allow_merge(q, rq, bio))
  978. return false;
  979. if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
  980. return false;
  981. *merged_request = attempt_front_merge(q, rq);
  982. if (!*merged_request)
  983. elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
  984. return true;
  985. case ELEVATOR_DISCARD_MERGE:
  986. return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
  987. default:
  988. return false;
  989. }
  990. }
  991. EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);