mq-deadline.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
  4. * for the blk-mq scheduling framework
  5. *
  6. * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/fs.h>
  10. #include <linux/blkdev.h>
  11. #include <linux/bio.h>
  12. #include <linux/module.h>
  13. #include <linux/slab.h>
  14. #include <linux/init.h>
  15. #include <linux/compiler.h>
  16. #include <linux/rbtree.h>
  17. #include <linux/sbitmap.h>
  18. #include <trace/events/block.h>
  19. #include "elevator.h"
  20. #include "blk.h"
  21. #include "blk-mq.h"
  22. #include "blk-mq-debugfs.h"
  23. #include "blk-mq-sched.h"
  24. /*
  25. * See Documentation/block/deadline-iosched.rst
  26. */
  27. static const int read_expire = HZ / 2; /* max time before a read is submitted. */
  28. static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
  29. /*
  30. * Time after which to dispatch lower priority requests even if higher
  31. * priority requests are pending.
  32. */
  33. static const int prio_aging_expire = 10 * HZ;
  34. static const int writes_starved = 2; /* max times reads can starve a write */
  35. static const int fifo_batch = 16; /* # of sequential requests treated as one
  36. by the above parameters. For throughput. */
  37. enum dd_data_dir {
  38. DD_READ = READ,
  39. DD_WRITE = WRITE,
  40. };
  41. enum { DD_DIR_COUNT = 2 };
  42. enum dd_prio {
  43. DD_RT_PRIO = 0,
  44. DD_BE_PRIO = 1,
  45. DD_IDLE_PRIO = 2,
  46. DD_PRIO_MAX = 2,
  47. };
  48. enum { DD_PRIO_COUNT = 3 };
  49. /*
  50. * I/O statistics per I/O priority. It is fine if these counters overflow.
  51. * What matters is that these counters are at least as wide as
  52. * log2(max_outstanding_requests).
  53. */
  54. struct io_stats_per_prio {
  55. uint32_t inserted;
  56. uint32_t merged;
  57. uint32_t dispatched;
  58. atomic_t completed;
  59. };
  60. /*
  61. * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
  62. * present on both sort_list[] and fifo_list[].
  63. */
  64. struct dd_per_prio {
  65. struct rb_root sort_list[DD_DIR_COUNT];
  66. struct list_head fifo_list[DD_DIR_COUNT];
  67. /* Position of the most recently dispatched request. */
  68. sector_t latest_pos[DD_DIR_COUNT];
  69. struct io_stats_per_prio stats;
  70. };
  71. struct deadline_data {
  72. /*
  73. * run time data
  74. */
  75. struct list_head dispatch;
  76. struct dd_per_prio per_prio[DD_PRIO_COUNT];
  77. /* Data direction of latest dispatched request. */
  78. enum dd_data_dir last_dir;
  79. unsigned int batching; /* number of sequential requests made */
  80. unsigned int starved; /* times reads have starved writes */
  81. /*
  82. * settings that change how the i/o scheduler behaves
  83. */
  84. int fifo_expire[DD_DIR_COUNT];
  85. int fifo_batch;
  86. int writes_starved;
  87. int front_merges;
  88. int prio_aging_expire;
  89. spinlock_t lock;
  90. };
  91. /* Maps an I/O priority class to a deadline scheduler priority. */
  92. static const enum dd_prio ioprio_class_to_prio[] = {
  93. [IOPRIO_CLASS_NONE] = DD_BE_PRIO,
  94. [IOPRIO_CLASS_RT] = DD_RT_PRIO,
  95. [IOPRIO_CLASS_BE] = DD_BE_PRIO,
  96. [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO,
  97. };
  98. static inline struct rb_root *
  99. deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
  100. {
  101. return &per_prio->sort_list[rq_data_dir(rq)];
  102. }
  103. /*
  104. * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
  105. * request.
  106. */
  107. static u8 dd_rq_ioclass(struct request *rq)
  108. {
  109. return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
  110. }
  111. /*
  112. * Return the first request for which blk_rq_pos() >= @pos.
  113. */
  114. static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
  115. enum dd_data_dir data_dir, sector_t pos)
  116. {
  117. struct rb_node *node = per_prio->sort_list[data_dir].rb_node;
  118. struct request *rq, *res = NULL;
  119. while (node) {
  120. rq = rb_entry_rq(node);
  121. if (blk_rq_pos(rq) >= pos) {
  122. res = rq;
  123. node = node->rb_left;
  124. } else {
  125. node = node->rb_right;
  126. }
  127. }
  128. return res;
  129. }
  130. static void
  131. deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
  132. {
  133. struct rb_root *root = deadline_rb_root(per_prio, rq);
  134. elv_rb_add(root, rq);
  135. }
  136. static inline void
  137. deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
  138. {
  139. elv_rb_del(deadline_rb_root(per_prio, rq), rq);
  140. }
  141. /*
  142. * remove rq from rbtree and fifo.
  143. */
  144. static void deadline_remove_request(struct request_queue *q,
  145. struct dd_per_prio *per_prio,
  146. struct request *rq)
  147. {
  148. list_del_init(&rq->queuelist);
  149. /*
  150. * We might not be on the rbtree, if we are doing an insert merge
  151. */
  152. if (!RB_EMPTY_NODE(&rq->rb_node))
  153. deadline_del_rq_rb(per_prio, rq);
  154. elv_rqhash_del(q, rq);
  155. if (q->last_merge == rq)
  156. q->last_merge = NULL;
  157. }
  158. static void dd_request_merged(struct request_queue *q, struct request *req,
  159. enum elv_merge type)
  160. {
  161. struct deadline_data *dd = q->elevator->elevator_data;
  162. const u8 ioprio_class = dd_rq_ioclass(req);
  163. const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  164. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  165. /*
  166. * if the merge was a front merge, we need to reposition request
  167. */
  168. if (type == ELEVATOR_FRONT_MERGE) {
  169. elv_rb_del(deadline_rb_root(per_prio, req), req);
  170. deadline_add_rq_rb(per_prio, req);
  171. }
  172. }
  173. /*
  174. * Callback function that is invoked after @next has been merged into @req.
  175. */
  176. static void dd_merged_requests(struct request_queue *q, struct request *req,
  177. struct request *next)
  178. {
  179. struct deadline_data *dd = q->elevator->elevator_data;
  180. const u8 ioprio_class = dd_rq_ioclass(next);
  181. const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  182. lockdep_assert_held(&dd->lock);
  183. dd->per_prio[prio].stats.merged++;
  184. /*
  185. * if next expires before rq, assign its expire time to rq
  186. * and move into next position (next will be deleted) in fifo
  187. */
  188. if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
  189. if (time_before((unsigned long)next->fifo_time,
  190. (unsigned long)req->fifo_time)) {
  191. list_move(&req->queuelist, &next->queuelist);
  192. req->fifo_time = next->fifo_time;
  193. }
  194. }
  195. /*
  196. * kill knowledge of next, this one is a goner
  197. */
  198. deadline_remove_request(q, &dd->per_prio[prio], next);
  199. }
  200. /*
  201. * move an entry to dispatch queue
  202. */
  203. static void
  204. deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  205. struct request *rq)
  206. {
  207. /*
  208. * take it off the sort and fifo list
  209. */
  210. deadline_remove_request(rq->q, per_prio, rq);
  211. }
  212. /* Number of requests queued for a given priority level. */
  213. static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
  214. {
  215. const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
  216. lockdep_assert_held(&dd->lock);
  217. return stats->inserted - atomic_read(&stats->completed);
  218. }
  219. /*
  220. * deadline_check_fifo returns true if and only if there are expired requests
  221. * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]).
  222. */
  223. static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
  224. enum dd_data_dir data_dir)
  225. {
  226. struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
  227. return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
  228. }
  229. /*
  230. * For the specified data direction, return the next request to
  231. * dispatch using arrival ordered lists.
  232. */
  233. static struct request *
  234. deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  235. enum dd_data_dir data_dir)
  236. {
  237. if (list_empty(&per_prio->fifo_list[data_dir]))
  238. return NULL;
  239. return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
  240. }
  241. /*
  242. * For the specified data direction, return the next request to
  243. * dispatch using sector position sorted lists.
  244. */
  245. static struct request *
  246. deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  247. enum dd_data_dir data_dir)
  248. {
  249. return deadline_from_pos(per_prio, data_dir,
  250. per_prio->latest_pos[data_dir]);
  251. }
  252. /*
  253. * Returns true if and only if @rq started after @latest_start where
  254. * @latest_start is in jiffies.
  255. */
  256. static bool started_after(struct deadline_data *dd, struct request *rq,
  257. unsigned long latest_start)
  258. {
  259. unsigned long start_time = (unsigned long)rq->fifo_time;
  260. start_time -= dd->fifo_expire[rq_data_dir(rq)];
  261. return time_after(start_time, latest_start);
  262. }
  263. static struct request *dd_start_request(struct deadline_data *dd,
  264. enum dd_data_dir data_dir,
  265. struct request *rq)
  266. {
  267. u8 ioprio_class = dd_rq_ioclass(rq);
  268. enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  269. dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
  270. dd->per_prio[prio].stats.dispatched++;
  271. rq->rq_flags |= RQF_STARTED;
  272. return rq;
  273. }
  274. /*
  275. * deadline_dispatch_requests selects the best request according to
  276. * read/write expire, fifo_batch, etc and with a start time <= @latest_start.
  277. */
  278. static struct request *__dd_dispatch_request(struct deadline_data *dd,
  279. struct dd_per_prio *per_prio,
  280. unsigned long latest_start)
  281. {
  282. struct request *rq, *next_rq;
  283. enum dd_data_dir data_dir;
  284. lockdep_assert_held(&dd->lock);
  285. /*
  286. * batches are currently reads XOR writes
  287. */
  288. rq = deadline_next_request(dd, per_prio, dd->last_dir);
  289. if (rq && dd->batching < dd->fifo_batch) {
  290. /* we have a next request and are still entitled to batch */
  291. data_dir = rq_data_dir(rq);
  292. goto dispatch_request;
  293. }
  294. /*
  295. * at this point we are not running a batch. select the appropriate
  296. * data direction (read / write)
  297. */
  298. if (!list_empty(&per_prio->fifo_list[DD_READ])) {
  299. BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
  300. if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
  301. (dd->starved++ >= dd->writes_starved))
  302. goto dispatch_writes;
  303. data_dir = DD_READ;
  304. goto dispatch_find_request;
  305. }
  306. /*
  307. * there are either no reads or writes have been starved
  308. */
  309. if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
  310. dispatch_writes:
  311. BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
  312. dd->starved = 0;
  313. data_dir = DD_WRITE;
  314. goto dispatch_find_request;
  315. }
  316. return NULL;
  317. dispatch_find_request:
  318. /*
  319. * we are not running a batch, find best request for selected data_dir
  320. */
  321. next_rq = deadline_next_request(dd, per_prio, data_dir);
  322. if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
  323. /*
  324. * A deadline has expired, the last request was in the other
  325. * direction, or we have run out of higher-sectored requests.
  326. * Start again from the request with the earliest expiry time.
  327. */
  328. rq = deadline_fifo_request(dd, per_prio, data_dir);
  329. } else {
  330. /*
  331. * The last req was the same dir and we have a next request in
  332. * sort order. No expired requests so continue on from here.
  333. */
  334. rq = next_rq;
  335. }
  336. if (!rq)
  337. return NULL;
  338. dd->last_dir = data_dir;
  339. dd->batching = 0;
  340. dispatch_request:
  341. if (started_after(dd, rq, latest_start))
  342. return NULL;
  343. /*
  344. * rq is the selected appropriate request.
  345. */
  346. dd->batching++;
  347. deadline_move_request(dd, per_prio, rq);
  348. return dd_start_request(dd, data_dir, rq);
  349. }
  350. /*
  351. * Check whether there are any requests with priority other than DD_RT_PRIO
  352. * that were inserted more than prio_aging_expire jiffies ago.
  353. */
  354. static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
  355. unsigned long now)
  356. {
  357. struct request *rq;
  358. enum dd_prio prio;
  359. int prio_cnt;
  360. lockdep_assert_held(&dd->lock);
  361. prio_cnt = !!dd_queued(dd, DD_RT_PRIO) + !!dd_queued(dd, DD_BE_PRIO) +
  362. !!dd_queued(dd, DD_IDLE_PRIO);
  363. if (prio_cnt < 2)
  364. return NULL;
  365. for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
  366. rq = __dd_dispatch_request(dd, &dd->per_prio[prio],
  367. now - dd->prio_aging_expire);
  368. if (rq)
  369. return rq;
  370. }
  371. return NULL;
  372. }
  373. /*
  374. * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
  375. *
  376. * One confusing aspect here is that we get called for a specific
  377. * hardware queue, but we may return a request that is for a
  378. * different hardware queue. This is because mq-deadline has shared
  379. * state for all hardware queues, in terms of sorting, FIFOs, etc.
  380. */
  381. static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
  382. {
  383. struct deadline_data *dd = hctx->queue->elevator->elevator_data;
  384. const unsigned long now = jiffies;
  385. struct request *rq;
  386. enum dd_prio prio;
  387. spin_lock(&dd->lock);
  388. if (!list_empty(&dd->dispatch)) {
  389. rq = list_first_entry(&dd->dispatch, struct request, queuelist);
  390. list_del_init(&rq->queuelist);
  391. dd_start_request(dd, rq_data_dir(rq), rq);
  392. goto unlock;
  393. }
  394. rq = dd_dispatch_prio_aged_requests(dd, now);
  395. if (rq)
  396. goto unlock;
  397. /*
  398. * Next, dispatch requests in priority order. Ignore lower priority
  399. * requests if any higher priority requests are pending.
  400. */
  401. for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
  402. rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now);
  403. if (rq || dd_queued(dd, prio))
  404. break;
  405. }
  406. unlock:
  407. spin_unlock(&dd->lock);
  408. return rq;
  409. }
  410. static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
  411. {
  412. if (!blk_mq_is_sync_read(opf))
  413. data->shallow_depth = data->q->async_depth;
  414. }
  415. /* Called by blk_mq_init_sched() and blk_mq_update_nr_requests(). */
  416. static void dd_depth_updated(struct request_queue *q)
  417. {
  418. blk_mq_set_min_shallow_depth(q, q->async_depth);
  419. }
  420. static void dd_exit_sched(struct elevator_queue *e)
  421. {
  422. struct deadline_data *dd = e->elevator_data;
  423. enum dd_prio prio;
  424. for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
  425. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  426. const struct io_stats_per_prio *stats = &per_prio->stats;
  427. uint32_t queued;
  428. WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
  429. WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
  430. spin_lock(&dd->lock);
  431. queued = dd_queued(dd, prio);
  432. spin_unlock(&dd->lock);
  433. WARN_ONCE(queued != 0,
  434. "statistics for priority %d: i %u m %u d %u c %u\n",
  435. prio, stats->inserted, stats->merged,
  436. stats->dispatched, atomic_read(&stats->completed));
  437. }
  438. kfree(dd);
  439. }
  440. /*
  441. * initialize elevator private data (deadline_data).
  442. */
  443. static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq)
  444. {
  445. struct deadline_data *dd;
  446. enum dd_prio prio;
  447. dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
  448. if (!dd)
  449. return -ENOMEM;
  450. eq->elevator_data = dd;
  451. INIT_LIST_HEAD(&dd->dispatch);
  452. for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
  453. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  454. INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
  455. INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
  456. per_prio->sort_list[DD_READ] = RB_ROOT;
  457. per_prio->sort_list[DD_WRITE] = RB_ROOT;
  458. }
  459. dd->fifo_expire[DD_READ] = read_expire;
  460. dd->fifo_expire[DD_WRITE] = write_expire;
  461. dd->writes_starved = writes_starved;
  462. dd->front_merges = 1;
  463. dd->last_dir = DD_WRITE;
  464. dd->fifo_batch = fifo_batch;
  465. dd->prio_aging_expire = prio_aging_expire;
  466. spin_lock_init(&dd->lock);
  467. /* We dispatch from request queue wide instead of hw queue */
  468. blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
  469. q->elevator = eq;
  470. q->async_depth = q->nr_requests;
  471. dd_depth_updated(q);
  472. return 0;
  473. }
  474. /*
  475. * Try to merge @bio into an existing request. If @bio has been merged into
  476. * an existing request, store the pointer to that request into *@rq.
  477. */
  478. static int dd_request_merge(struct request_queue *q, struct request **rq,
  479. struct bio *bio)
  480. {
  481. struct deadline_data *dd = q->elevator->elevator_data;
  482. const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
  483. const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
  484. struct dd_per_prio *per_prio = &dd->per_prio[prio];
  485. sector_t sector = bio_end_sector(bio);
  486. struct request *__rq;
  487. if (!dd->front_merges)
  488. return ELEVATOR_NO_MERGE;
  489. __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
  490. if (__rq) {
  491. BUG_ON(sector != blk_rq_pos(__rq));
  492. if (elv_bio_merge_ok(__rq, bio)) {
  493. *rq = __rq;
  494. if (blk_discard_mergable(__rq))
  495. return ELEVATOR_DISCARD_MERGE;
  496. return ELEVATOR_FRONT_MERGE;
  497. }
  498. }
  499. return ELEVATOR_NO_MERGE;
  500. }
  501. /*
  502. * Attempt to merge a bio into an existing request. This function is called
  503. * before @bio is associated with a request.
  504. */
  505. static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
  506. unsigned int nr_segs)
  507. {
  508. struct deadline_data *dd = q->elevator->elevator_data;
  509. struct request *free = NULL;
  510. bool ret;
  511. spin_lock(&dd->lock);
  512. ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
  513. spin_unlock(&dd->lock);
  514. if (free)
  515. blk_mq_free_request(free);
  516. return ret;
  517. }
  518. /*
  519. * add rq to rbtree and fifo
  520. */
  521. static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
  522. blk_insert_t flags, struct list_head *free)
  523. {
  524. struct request_queue *q = hctx->queue;
  525. struct deadline_data *dd = q->elevator->elevator_data;
  526. const enum dd_data_dir data_dir = rq_data_dir(rq);
  527. u16 ioprio = req_get_ioprio(rq);
  528. u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
  529. struct dd_per_prio *per_prio;
  530. enum dd_prio prio;
  531. lockdep_assert_held(&dd->lock);
  532. prio = ioprio_class_to_prio[ioprio_class];
  533. per_prio = &dd->per_prio[prio];
  534. if (!rq->elv.priv[0])
  535. per_prio->stats.inserted++;
  536. rq->elv.priv[0] = per_prio;
  537. if (blk_mq_sched_try_insert_merge(q, rq, free))
  538. return;
  539. trace_block_rq_insert(rq);
  540. if (flags & BLK_MQ_INSERT_AT_HEAD) {
  541. list_add(&rq->queuelist, &dd->dispatch);
  542. rq->fifo_time = jiffies;
  543. } else {
  544. deadline_add_rq_rb(per_prio, rq);
  545. if (rq_mergeable(rq)) {
  546. elv_rqhash_add(q, rq);
  547. if (!q->last_merge)
  548. q->last_merge = rq;
  549. }
  550. /*
  551. * set expire time and add to fifo list
  552. */
  553. rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
  554. list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
  555. }
  556. }
  557. /*
  558. * Called from blk_mq_insert_request() or blk_mq_dispatch_list().
  559. */
  560. static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
  561. struct list_head *list,
  562. blk_insert_t flags)
  563. {
  564. struct request_queue *q = hctx->queue;
  565. struct deadline_data *dd = q->elevator->elevator_data;
  566. LIST_HEAD(free);
  567. spin_lock(&dd->lock);
  568. while (!list_empty(list)) {
  569. struct request *rq;
  570. rq = list_first_entry(list, struct request, queuelist);
  571. list_del_init(&rq->queuelist);
  572. dd_insert_request(hctx, rq, flags, &free);
  573. }
  574. spin_unlock(&dd->lock);
  575. blk_mq_free_requests(&free);
  576. }
  577. /* Callback from inside blk_mq_rq_ctx_init(). */
  578. static void dd_prepare_request(struct request *rq)
  579. {
  580. rq->elv.priv[0] = NULL;
  581. }
  582. /*
  583. * Callback from inside blk_mq_free_request().
  584. */
  585. static void dd_finish_request(struct request *rq)
  586. {
  587. struct dd_per_prio *per_prio = rq->elv.priv[0];
  588. /*
  589. * The block layer core may call dd_finish_request() without having
  590. * called dd_insert_requests(). Skip requests that bypassed I/O
  591. * scheduling. See also blk_mq_request_bypass_insert().
  592. */
  593. if (per_prio)
  594. atomic_inc(&per_prio->stats.completed);
  595. }
  596. static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
  597. {
  598. return !list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
  599. !list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
  600. }
  601. static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
  602. {
  603. struct deadline_data *dd = hctx->queue->elevator->elevator_data;
  604. enum dd_prio prio;
  605. if (!list_empty_careful(&dd->dispatch))
  606. return true;
  607. for (prio = 0; prio <= DD_PRIO_MAX; prio++)
  608. if (dd_has_work_for_prio(&dd->per_prio[prio]))
  609. return true;
  610. return false;
  611. }
  612. /*
  613. * sysfs parts below
  614. */
  615. #define SHOW_INT(__FUNC, __VAR) \
  616. static ssize_t __FUNC(struct elevator_queue *e, char *page) \
  617. { \
  618. struct deadline_data *dd = e->elevator_data; \
  619. \
  620. return sysfs_emit(page, "%d\n", __VAR); \
  621. }
  622. #define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
  623. SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
  624. SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
  625. SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
  626. SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
  627. SHOW_INT(deadline_front_merges_show, dd->front_merges);
  628. SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
  629. #undef SHOW_INT
  630. #undef SHOW_JIFFIES
  631. #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
  632. static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
  633. { \
  634. struct deadline_data *dd = e->elevator_data; \
  635. int __data, __ret; \
  636. \
  637. __ret = kstrtoint(page, 0, &__data); \
  638. if (__ret < 0) \
  639. return __ret; \
  640. if (__data < (MIN)) \
  641. __data = (MIN); \
  642. else if (__data > (MAX)) \
  643. __data = (MAX); \
  644. *(__PTR) = __CONV(__data); \
  645. return count; \
  646. }
  647. #define STORE_INT(__FUNC, __PTR, MIN, MAX) \
  648. STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
  649. #define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX) \
  650. STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
  651. STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
  652. STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
  653. STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
  654. STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
  655. STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
  656. STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
  657. #undef STORE_FUNCTION
  658. #undef STORE_INT
  659. #undef STORE_JIFFIES
  660. #define DD_ATTR(name) \
  661. __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
  662. static const struct elv_fs_entry deadline_attrs[] = {
  663. DD_ATTR(read_expire),
  664. DD_ATTR(write_expire),
  665. DD_ATTR(writes_starved),
  666. DD_ATTR(front_merges),
  667. DD_ATTR(fifo_batch),
  668. DD_ATTR(prio_aging_expire),
  669. __ATTR_NULL
  670. };
  671. #ifdef CONFIG_BLK_DEBUG_FS
  672. #define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name) \
  673. static void *deadline_##name##_fifo_start(struct seq_file *m, \
  674. loff_t *pos) \
  675. __acquires(&dd->lock) \
  676. { \
  677. struct request_queue *q = m->private; \
  678. struct deadline_data *dd = q->elevator->elevator_data; \
  679. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  680. \
  681. spin_lock(&dd->lock); \
  682. return seq_list_start(&per_prio->fifo_list[data_dir], *pos); \
  683. } \
  684. \
  685. static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \
  686. loff_t *pos) \
  687. { \
  688. struct request_queue *q = m->private; \
  689. struct deadline_data *dd = q->elevator->elevator_data; \
  690. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  691. \
  692. return seq_list_next(v, &per_prio->fifo_list[data_dir], pos); \
  693. } \
  694. \
  695. static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \
  696. __releases(&dd->lock) \
  697. { \
  698. struct request_queue *q = m->private; \
  699. struct deadline_data *dd = q->elevator->elevator_data; \
  700. \
  701. spin_unlock(&dd->lock); \
  702. } \
  703. \
  704. static const struct seq_operations deadline_##name##_fifo_seq_ops = { \
  705. .start = deadline_##name##_fifo_start, \
  706. .next = deadline_##name##_fifo_next, \
  707. .stop = deadline_##name##_fifo_stop, \
  708. .show = blk_mq_debugfs_rq_show, \
  709. }; \
  710. \
  711. static int deadline_##name##_next_rq_show(void *data, \
  712. struct seq_file *m) \
  713. { \
  714. struct request_queue *q = data; \
  715. struct deadline_data *dd = q->elevator->elevator_data; \
  716. struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
  717. struct request *rq; \
  718. \
  719. rq = deadline_from_pos(per_prio, data_dir, \
  720. per_prio->latest_pos[data_dir]); \
  721. if (rq) \
  722. __blk_mq_debugfs_rq_show(m, rq); \
  723. return 0; \
  724. }
  725. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
  726. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
  727. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
  728. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
  729. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
  730. DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
  731. #undef DEADLINE_DEBUGFS_DDIR_ATTRS
  732. static int deadline_batching_show(void *data, struct seq_file *m)
  733. {
  734. struct request_queue *q = data;
  735. struct deadline_data *dd = q->elevator->elevator_data;
  736. seq_printf(m, "%u\n", dd->batching);
  737. return 0;
  738. }
  739. static int deadline_starved_show(void *data, struct seq_file *m)
  740. {
  741. struct request_queue *q = data;
  742. struct deadline_data *dd = q->elevator->elevator_data;
  743. seq_printf(m, "%u\n", dd->starved);
  744. return 0;
  745. }
  746. static int dd_queued_show(void *data, struct seq_file *m)
  747. {
  748. struct request_queue *q = data;
  749. struct deadline_data *dd = q->elevator->elevator_data;
  750. u32 rt, be, idle;
  751. spin_lock(&dd->lock);
  752. rt = dd_queued(dd, DD_RT_PRIO);
  753. be = dd_queued(dd, DD_BE_PRIO);
  754. idle = dd_queued(dd, DD_IDLE_PRIO);
  755. spin_unlock(&dd->lock);
  756. seq_printf(m, "%u %u %u\n", rt, be, idle);
  757. return 0;
  758. }
  759. /* Number of requests owned by the block driver for a given priority. */
  760. static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
  761. {
  762. const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
  763. lockdep_assert_held(&dd->lock);
  764. return stats->dispatched + stats->merged -
  765. atomic_read(&stats->completed);
  766. }
  767. static int dd_owned_by_driver_show(void *data, struct seq_file *m)
  768. {
  769. struct request_queue *q = data;
  770. struct deadline_data *dd = q->elevator->elevator_data;
  771. u32 rt, be, idle;
  772. spin_lock(&dd->lock);
  773. rt = dd_owned_by_driver(dd, DD_RT_PRIO);
  774. be = dd_owned_by_driver(dd, DD_BE_PRIO);
  775. idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
  776. spin_unlock(&dd->lock);
  777. seq_printf(m, "%u %u %u\n", rt, be, idle);
  778. return 0;
  779. }
  780. static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos)
  781. __acquires(&dd->lock)
  782. {
  783. struct request_queue *q = m->private;
  784. struct deadline_data *dd = q->elevator->elevator_data;
  785. spin_lock(&dd->lock);
  786. return seq_list_start(&dd->dispatch, *pos);
  787. }
  788. static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
  789. {
  790. struct request_queue *q = m->private;
  791. struct deadline_data *dd = q->elevator->elevator_data;
  792. return seq_list_next(v, &dd->dispatch, pos);
  793. }
  794. static void deadline_dispatch_stop(struct seq_file *m, void *v)
  795. __releases(&dd->lock)
  796. {
  797. struct request_queue *q = m->private;
  798. struct deadline_data *dd = q->elevator->elevator_data;
  799. spin_unlock(&dd->lock);
  800. }
  801. static const struct seq_operations deadline_dispatch_seq_ops = {
  802. .start = deadline_dispatch_start,
  803. .next = deadline_dispatch_next,
  804. .stop = deadline_dispatch_stop,
  805. .show = blk_mq_debugfs_rq_show,
  806. };
  807. #define DEADLINE_QUEUE_DDIR_ATTRS(name) \
  808. {#name "_fifo_list", 0400, \
  809. .seq_ops = &deadline_##name##_fifo_seq_ops}
  810. #define DEADLINE_NEXT_RQ_ATTR(name) \
  811. {#name "_next_rq", 0400, deadline_##name##_next_rq_show}
  812. static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
  813. DEADLINE_QUEUE_DDIR_ATTRS(read0),
  814. DEADLINE_QUEUE_DDIR_ATTRS(write0),
  815. DEADLINE_QUEUE_DDIR_ATTRS(read1),
  816. DEADLINE_QUEUE_DDIR_ATTRS(write1),
  817. DEADLINE_QUEUE_DDIR_ATTRS(read2),
  818. DEADLINE_QUEUE_DDIR_ATTRS(write2),
  819. DEADLINE_NEXT_RQ_ATTR(read0),
  820. DEADLINE_NEXT_RQ_ATTR(write0),
  821. DEADLINE_NEXT_RQ_ATTR(read1),
  822. DEADLINE_NEXT_RQ_ATTR(write1),
  823. DEADLINE_NEXT_RQ_ATTR(read2),
  824. DEADLINE_NEXT_RQ_ATTR(write2),
  825. {"batching", 0400, deadline_batching_show},
  826. {"starved", 0400, deadline_starved_show},
  827. {"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
  828. {"owned_by_driver", 0400, dd_owned_by_driver_show},
  829. {"queued", 0400, dd_queued_show},
  830. {},
  831. };
  832. #undef DEADLINE_QUEUE_DDIR_ATTRS
  833. #endif
  834. static struct elevator_type mq_deadline = {
  835. .ops = {
  836. .depth_updated = dd_depth_updated,
  837. .limit_depth = dd_limit_depth,
  838. .insert_requests = dd_insert_requests,
  839. .dispatch_request = dd_dispatch_request,
  840. .prepare_request = dd_prepare_request,
  841. .finish_request = dd_finish_request,
  842. .next_request = elv_rb_latter_request,
  843. .former_request = elv_rb_former_request,
  844. .bio_merge = dd_bio_merge,
  845. .request_merge = dd_request_merge,
  846. .requests_merged = dd_merged_requests,
  847. .request_merged = dd_request_merged,
  848. .has_work = dd_has_work,
  849. .init_sched = dd_init_sched,
  850. .exit_sched = dd_exit_sched,
  851. },
  852. #ifdef CONFIG_BLK_DEBUG_FS
  853. .queue_debugfs_attrs = deadline_queue_debugfs_attrs,
  854. #endif
  855. .elevator_attrs = deadline_attrs,
  856. .elevator_name = "mq-deadline",
  857. .elevator_alias = "deadline",
  858. .elevator_owner = THIS_MODULE,
  859. };
  860. MODULE_ALIAS("mq-deadline-iosched");
  861. static int __init deadline_init(void)
  862. {
  863. return elv_register(&mq_deadline);
  864. }
  865. static void __exit deadline_exit(void)
  866. {
  867. elv_unregister(&mq_deadline);
  868. }
  869. module_init(deadline_init);
  870. module_exit(deadline_exit);
  871. MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
  872. MODULE_LICENSE("GPL");
  873. MODULE_DESCRIPTION("MQ deadline IO scheduler");