blk-mq-sched.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * blk-mq scheduling framework
  4. *
  5. * Copyright (C) 2016 Jens Axboe
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/list_sort.h>
  10. #include <trace/events/block.h>
  11. #include "blk.h"
  12. #include "blk-mq.h"
  13. #include "blk-mq-debugfs.h"
  14. #include "blk-mq-sched.h"
  15. #include "blk-wbt.h"
  16. /*
  17. * Mark a hardware queue as needing a restart.
  18. */
  19. void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
  20. {
  21. if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
  22. return;
  23. set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  24. }
  25. EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
  26. void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
  27. {
  28. clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  29. /*
  30. * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
  31. * in blk_mq_run_hw_queue(). Its pair is the barrier in
  32. * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
  33. * meantime new request added to hctx->dispatch is missed to check in
  34. * blk_mq_run_hw_queue().
  35. */
  36. smp_mb();
  37. blk_mq_run_hw_queue(hctx, true);
  38. }
  39. static int sched_rq_cmp(void *priv, const struct list_head *a,
  40. const struct list_head *b)
  41. {
  42. struct request *rqa = container_of(a, struct request, queuelist);
  43. struct request *rqb = container_of(b, struct request, queuelist);
  44. return rqa->mq_hctx > rqb->mq_hctx;
  45. }
  46. static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
  47. {
  48. struct blk_mq_hw_ctx *hctx =
  49. list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
  50. struct request *rq;
  51. LIST_HEAD(hctx_list);
  52. list_for_each_entry(rq, rq_list, queuelist) {
  53. if (rq->mq_hctx != hctx) {
  54. list_cut_before(&hctx_list, rq_list, &rq->queuelist);
  55. goto dispatch;
  56. }
  57. }
  58. list_splice_tail_init(rq_list, &hctx_list);
  59. dispatch:
  60. return blk_mq_dispatch_rq_list(hctx, &hctx_list, false);
  61. }
  62. #define BLK_MQ_BUDGET_DELAY 3 /* ms units */
  63. /*
  64. * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
  65. * its queue by itself in its completion handler, so we don't need to
  66. * restart queue if .get_budget() fails to get the budget.
  67. *
  68. * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  69. * be run again. This is necessary to avoid starving flushes.
  70. */
  71. static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  72. {
  73. struct request_queue *q = hctx->queue;
  74. struct elevator_queue *e = q->elevator;
  75. bool multi_hctxs = false, run_queue = false;
  76. bool dispatched = false, busy = false;
  77. unsigned int max_dispatch;
  78. LIST_HEAD(rq_list);
  79. int count = 0;
  80. if (hctx->dispatch_busy)
  81. max_dispatch = 1;
  82. else
  83. max_dispatch = hctx->queue->nr_requests;
  84. do {
  85. struct request *rq;
  86. int budget_token;
  87. if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
  88. break;
  89. if (!list_empty_careful(&hctx->dispatch)) {
  90. busy = true;
  91. break;
  92. }
  93. budget_token = blk_mq_get_dispatch_budget(q);
  94. if (budget_token < 0)
  95. break;
  96. rq = e->type->ops.dispatch_request(hctx);
  97. if (!rq) {
  98. blk_mq_put_dispatch_budget(q, budget_token);
  99. /*
  100. * We're releasing without dispatching. Holding the
  101. * budget could have blocked any "hctx"s with the
  102. * same queue and if we didn't dispatch then there's
  103. * no guarantee anyone will kick the queue. Kick it
  104. * ourselves.
  105. */
  106. run_queue = true;
  107. break;
  108. }
  109. blk_mq_set_rq_budget_token(rq, budget_token);
  110. /*
  111. * Now this rq owns the budget which has to be released
  112. * if this rq won't be queued to driver via .queue_rq()
  113. * in blk_mq_dispatch_rq_list().
  114. */
  115. list_add_tail(&rq->queuelist, &rq_list);
  116. count++;
  117. if (rq->mq_hctx != hctx)
  118. multi_hctxs = true;
  119. /*
  120. * If we cannot get tag for the request, stop dequeueing
  121. * requests from the IO scheduler. We are unlikely to be able
  122. * to submit them anyway and it creates false impression for
  123. * scheduling heuristics that the device can take more IO.
  124. */
  125. if (!blk_mq_get_driver_tag(rq))
  126. break;
  127. } while (count < max_dispatch);
  128. if (!count) {
  129. if (run_queue)
  130. blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
  131. } else if (multi_hctxs) {
  132. /*
  133. * Requests from different hctx may be dequeued from some
  134. * schedulers, such as bfq and deadline.
  135. *
  136. * Sort the requests in the list according to their hctx,
  137. * dispatch batching requests from same hctx at a time.
  138. */
  139. list_sort(NULL, &rq_list, sched_rq_cmp);
  140. do {
  141. dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
  142. } while (!list_empty(&rq_list));
  143. } else {
  144. dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, false);
  145. }
  146. if (busy)
  147. return -EAGAIN;
  148. return !!dispatched;
  149. }
  150. static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  151. {
  152. unsigned long end = jiffies + HZ;
  153. int ret;
  154. do {
  155. ret = __blk_mq_do_dispatch_sched(hctx);
  156. if (ret != 1)
  157. break;
  158. if (need_resched() || time_is_before_jiffies(end)) {
  159. blk_mq_delay_run_hw_queue(hctx, 0);
  160. break;
  161. }
  162. } while (1);
  163. return ret;
  164. }
  165. static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
  166. struct blk_mq_ctx *ctx)
  167. {
  168. unsigned short idx = ctx->index_hw[hctx->type];
  169. if (++idx == hctx->nr_ctx)
  170. idx = 0;
  171. return hctx->ctxs[idx];
  172. }
  173. /*
  174. * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
  175. * its queue by itself in its completion handler, so we don't need to
  176. * restart queue if .get_budget() fails to get the budget.
  177. *
  178. * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  179. * be run again. This is necessary to avoid starving flushes.
  180. */
  181. static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
  182. {
  183. struct request_queue *q = hctx->queue;
  184. LIST_HEAD(rq_list);
  185. struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
  186. int ret = 0;
  187. struct request *rq;
  188. do {
  189. int budget_token;
  190. if (!list_empty_careful(&hctx->dispatch)) {
  191. ret = -EAGAIN;
  192. break;
  193. }
  194. if (!sbitmap_any_bit_set(&hctx->ctx_map))
  195. break;
  196. budget_token = blk_mq_get_dispatch_budget(q);
  197. if (budget_token < 0)
  198. break;
  199. rq = blk_mq_dequeue_from_ctx(hctx, ctx);
  200. if (!rq) {
  201. blk_mq_put_dispatch_budget(q, budget_token);
  202. /*
  203. * We're releasing without dispatching. Holding the
  204. * budget could have blocked any "hctx"s with the
  205. * same queue and if we didn't dispatch then there's
  206. * no guarantee anyone will kick the queue. Kick it
  207. * ourselves.
  208. */
  209. blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
  210. break;
  211. }
  212. blk_mq_set_rq_budget_token(rq, budget_token);
  213. /*
  214. * Now this rq owns the budget which has to be released
  215. * if this rq won't be queued to driver via .queue_rq()
  216. * in blk_mq_dispatch_rq_list().
  217. */
  218. list_add(&rq->queuelist, &rq_list);
  219. /* round robin for fair dispatch */
  220. ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
  221. } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, false));
  222. WRITE_ONCE(hctx->dispatch_from, ctx);
  223. return ret;
  224. }
  225. static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
  226. {
  227. bool need_dispatch = false;
  228. LIST_HEAD(rq_list);
  229. /*
  230. * If we have previous entries on our dispatch list, grab them first for
  231. * more fair dispatch.
  232. */
  233. if (!list_empty_careful(&hctx->dispatch)) {
  234. spin_lock(&hctx->lock);
  235. if (!list_empty(&hctx->dispatch))
  236. list_splice_init(&hctx->dispatch, &rq_list);
  237. spin_unlock(&hctx->lock);
  238. }
  239. /*
  240. * Only ask the scheduler for requests, if we didn't have residual
  241. * requests from the dispatch list. This is to avoid the case where
  242. * we only ever dispatch a fraction of the requests available because
  243. * of low device queue depth. Once we pull requests out of the IO
  244. * scheduler, we can no longer merge or sort them. So it's best to
  245. * leave them there for as long as we can. Mark the hw queue as
  246. * needing a restart in that case.
  247. *
  248. * We want to dispatch from the scheduler if there was nothing
  249. * on the dispatch list or we were able to dispatch from the
  250. * dispatch list.
  251. */
  252. if (!list_empty(&rq_list)) {
  253. blk_mq_sched_mark_restart_hctx(hctx);
  254. if (!blk_mq_dispatch_rq_list(hctx, &rq_list, true))
  255. return 0;
  256. need_dispatch = true;
  257. } else {
  258. need_dispatch = hctx->dispatch_busy;
  259. }
  260. if (hctx->queue->elevator)
  261. return blk_mq_do_dispatch_sched(hctx);
  262. /* dequeue request one by one from sw queue if queue is busy */
  263. if (need_dispatch)
  264. return blk_mq_do_dispatch_ctx(hctx);
  265. blk_mq_flush_busy_ctxs(hctx, &rq_list);
  266. blk_mq_dispatch_rq_list(hctx, &rq_list, true);
  267. return 0;
  268. }
  269. void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
  270. {
  271. struct request_queue *q = hctx->queue;
  272. /* RCU or SRCU read lock is needed before checking quiesced flag */
  273. if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
  274. return;
  275. /*
  276. * A return of -EAGAIN is an indication that hctx->dispatch is not
  277. * empty and we must run again in order to avoid starving flushes.
  278. */
  279. if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
  280. if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
  281. blk_mq_run_hw_queue(hctx, true);
  282. }
  283. }
  284. bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
  285. unsigned int nr_segs)
  286. {
  287. struct elevator_queue *e = q->elevator;
  288. struct blk_mq_ctx *ctx;
  289. struct blk_mq_hw_ctx *hctx;
  290. bool ret = false;
  291. enum hctx_type type;
  292. if (e && e->type->ops.bio_merge) {
  293. ret = e->type->ops.bio_merge(q, bio, nr_segs);
  294. goto out_put;
  295. }
  296. ctx = blk_mq_get_ctx(q);
  297. hctx = blk_mq_map_queue(bio->bi_opf, ctx);
  298. type = hctx->type;
  299. if (list_empty_careful(&ctx->rq_lists[type]))
  300. goto out_put;
  301. /* default per sw-queue merge */
  302. spin_lock(&ctx->lock);
  303. /*
  304. * Reverse check our software queue for entries that we could
  305. * potentially merge with. Currently includes a hand-wavy stop
  306. * count of 8, to not spend too much time checking for merges.
  307. */
  308. if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs))
  309. ret = true;
  310. spin_unlock(&ctx->lock);
  311. out_put:
  312. return ret;
  313. }
  314. bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
  315. struct list_head *free)
  316. {
  317. return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free);
  318. }
  319. EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
  320. /* called in queue's release handler, tagset has gone away */
  321. static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
  322. {
  323. struct blk_mq_hw_ctx *hctx;
  324. unsigned long i;
  325. queue_for_each_hw_ctx(q, hctx, i)
  326. hctx->sched_tags = NULL;
  327. if (blk_mq_is_shared_tags(flags))
  328. q->sched_shared_tags = NULL;
  329. }
  330. void blk_mq_sched_reg_debugfs(struct request_queue *q)
  331. {
  332. struct blk_mq_hw_ctx *hctx;
  333. unsigned int memflags;
  334. unsigned long i;
  335. memflags = blk_debugfs_lock(q);
  336. blk_mq_debugfs_register_sched(q);
  337. queue_for_each_hw_ctx(q, hctx, i)
  338. blk_mq_debugfs_register_sched_hctx(q, hctx);
  339. blk_debugfs_unlock(q, memflags);
  340. }
  341. void blk_mq_sched_unreg_debugfs(struct request_queue *q)
  342. {
  343. struct blk_mq_hw_ctx *hctx;
  344. unsigned long i;
  345. blk_debugfs_lock_nomemsave(q);
  346. queue_for_each_hw_ctx(q, hctx, i)
  347. blk_mq_debugfs_unregister_sched_hctx(hctx);
  348. blk_mq_debugfs_unregister_sched(q);
  349. blk_debugfs_unlock_nomemrestore(q);
  350. }
  351. void blk_mq_free_sched_tags(struct elevator_tags *et,
  352. struct blk_mq_tag_set *set)
  353. {
  354. unsigned long i;
  355. /* Shared tags are stored at index 0 in @tags. */
  356. if (blk_mq_is_shared_tags(set->flags))
  357. blk_mq_free_map_and_rqs(set, et->tags[0], BLK_MQ_NO_HCTX_IDX);
  358. else {
  359. for (i = 0; i < et->nr_hw_queues; i++)
  360. blk_mq_free_map_and_rqs(set, et->tags[i], i);
  361. }
  362. kfree(et);
  363. }
  364. void blk_mq_free_sched_res(struct elevator_resources *res,
  365. struct elevator_type *type,
  366. struct blk_mq_tag_set *set)
  367. {
  368. if (res->et) {
  369. blk_mq_free_sched_tags(res->et, set);
  370. res->et = NULL;
  371. }
  372. if (res->data) {
  373. blk_mq_free_sched_data(type, res->data);
  374. res->data = NULL;
  375. }
  376. }
  377. void blk_mq_free_sched_res_batch(struct xarray *elv_tbl,
  378. struct blk_mq_tag_set *set)
  379. {
  380. struct request_queue *q;
  381. struct elv_change_ctx *ctx;
  382. lockdep_assert_held_write(&set->update_nr_hwq_lock);
  383. list_for_each_entry(q, &set->tag_list, tag_set_list) {
  384. /*
  385. * Accessing q->elevator without holding q->elevator_lock is
  386. * safe because we're holding here set->update_nr_hwq_lock in
  387. * the writer context. So, scheduler update/switch code (which
  388. * acquires the same lock but in the reader context) can't run
  389. * concurrently.
  390. */
  391. if (q->elevator) {
  392. ctx = xa_load(elv_tbl, q->id);
  393. if (!ctx) {
  394. WARN_ON_ONCE(1);
  395. continue;
  396. }
  397. blk_mq_free_sched_res(&ctx->res, ctx->type, set);
  398. }
  399. }
  400. }
  401. void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl)
  402. {
  403. unsigned long i;
  404. struct elv_change_ctx *ctx;
  405. xa_for_each(elv_tbl, i, ctx) {
  406. xa_erase(elv_tbl, i);
  407. kfree(ctx);
  408. }
  409. }
  410. int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl,
  411. struct blk_mq_tag_set *set)
  412. {
  413. struct request_queue *q;
  414. struct elv_change_ctx *ctx;
  415. lockdep_assert_held_write(&set->update_nr_hwq_lock);
  416. list_for_each_entry(q, &set->tag_list, tag_set_list) {
  417. ctx = kzalloc_obj(struct elv_change_ctx);
  418. if (!ctx)
  419. return -ENOMEM;
  420. if (xa_insert(elv_tbl, q->id, ctx, GFP_KERNEL)) {
  421. kfree(ctx);
  422. return -ENOMEM;
  423. }
  424. }
  425. return 0;
  426. }
  427. struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
  428. unsigned int nr_hw_queues, unsigned int nr_requests)
  429. {
  430. unsigned int nr_tags;
  431. int i;
  432. struct elevator_tags *et;
  433. gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
  434. if (blk_mq_is_shared_tags(set->flags))
  435. nr_tags = 1;
  436. else
  437. nr_tags = nr_hw_queues;
  438. et = kmalloc_flex(*et, tags, nr_tags, gfp);
  439. if (!et)
  440. return NULL;
  441. et->nr_requests = nr_requests;
  442. et->nr_hw_queues = nr_hw_queues;
  443. if (blk_mq_is_shared_tags(set->flags)) {
  444. /* Shared tags are stored at index 0 in @tags. */
  445. et->tags[0] = blk_mq_alloc_map_and_rqs(set, BLK_MQ_NO_HCTX_IDX,
  446. MAX_SCHED_RQ);
  447. if (!et->tags[0])
  448. goto out;
  449. } else {
  450. for (i = 0; i < et->nr_hw_queues; i++) {
  451. et->tags[i] = blk_mq_alloc_map_and_rqs(set, i,
  452. et->nr_requests);
  453. if (!et->tags[i])
  454. goto out_unwind;
  455. }
  456. }
  457. return et;
  458. out_unwind:
  459. while (--i >= 0)
  460. blk_mq_free_map_and_rqs(set, et->tags[i], i);
  461. out:
  462. kfree(et);
  463. return NULL;
  464. }
  465. int blk_mq_alloc_sched_res(struct request_queue *q,
  466. struct elevator_type *type,
  467. struct elevator_resources *res,
  468. unsigned int nr_hw_queues)
  469. {
  470. struct blk_mq_tag_set *set = q->tag_set;
  471. res->et = blk_mq_alloc_sched_tags(set, nr_hw_queues,
  472. blk_mq_default_nr_requests(set));
  473. if (!res->et)
  474. return -ENOMEM;
  475. res->data = blk_mq_alloc_sched_data(q, type);
  476. if (IS_ERR(res->data)) {
  477. blk_mq_free_sched_tags(res->et, set);
  478. return -ENOMEM;
  479. }
  480. return 0;
  481. }
  482. int blk_mq_alloc_sched_res_batch(struct xarray *elv_tbl,
  483. struct blk_mq_tag_set *set, unsigned int nr_hw_queues)
  484. {
  485. struct elv_change_ctx *ctx;
  486. struct request_queue *q;
  487. int ret = -ENOMEM;
  488. lockdep_assert_held_write(&set->update_nr_hwq_lock);
  489. list_for_each_entry(q, &set->tag_list, tag_set_list) {
  490. /*
  491. * Accessing q->elevator without holding q->elevator_lock is
  492. * safe because we're holding here set->update_nr_hwq_lock in
  493. * the writer context. So, scheduler update/switch code (which
  494. * acquires the same lock but in the reader context) can't run
  495. * concurrently.
  496. */
  497. if (q->elevator) {
  498. ctx = xa_load(elv_tbl, q->id);
  499. if (WARN_ON_ONCE(!ctx)) {
  500. ret = -ENOENT;
  501. goto out_unwind;
  502. }
  503. ret = blk_mq_alloc_sched_res(q, q->elevator->type,
  504. &ctx->res, nr_hw_queues);
  505. if (ret)
  506. goto out_unwind;
  507. }
  508. }
  509. return 0;
  510. out_unwind:
  511. list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) {
  512. if (q->elevator) {
  513. ctx = xa_load(elv_tbl, q->id);
  514. if (ctx)
  515. blk_mq_free_sched_res(&ctx->res,
  516. ctx->type, set);
  517. }
  518. }
  519. return ret;
  520. }
  521. /* caller must have a reference to @e, will grab another one if successful */
  522. int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e,
  523. struct elevator_resources *res)
  524. {
  525. unsigned int flags = q->tag_set->flags;
  526. struct elevator_tags *et = res->et;
  527. struct blk_mq_hw_ctx *hctx;
  528. struct elevator_queue *eq;
  529. unsigned long i;
  530. int ret;
  531. eq = elevator_alloc(q, e, res);
  532. if (!eq)
  533. return -ENOMEM;
  534. q->nr_requests = et->nr_requests;
  535. if (blk_mq_is_shared_tags(flags)) {
  536. /* Shared tags are stored at index 0 in @et->tags. */
  537. q->sched_shared_tags = et->tags[0];
  538. blk_mq_tag_update_sched_shared_tags(q, et->nr_requests);
  539. }
  540. queue_for_each_hw_ctx(q, hctx, i) {
  541. if (blk_mq_is_shared_tags(flags))
  542. hctx->sched_tags = q->sched_shared_tags;
  543. else
  544. hctx->sched_tags = et->tags[i];
  545. }
  546. ret = e->ops.init_sched(q, eq);
  547. if (ret)
  548. goto out;
  549. queue_for_each_hw_ctx(q, hctx, i) {
  550. if (e->ops.init_hctx) {
  551. ret = e->ops.init_hctx(hctx, i);
  552. if (ret) {
  553. blk_mq_exit_sched(q, eq);
  554. kobject_put(&eq->kobj);
  555. return ret;
  556. }
  557. }
  558. }
  559. return 0;
  560. out:
  561. blk_mq_sched_tags_teardown(q, flags);
  562. kobject_put(&eq->kobj);
  563. q->elevator = NULL;
  564. return ret;
  565. }
  566. /*
  567. * called in either blk_queue_cleanup or elevator_switch, tagset
  568. * is required for freeing requests
  569. */
  570. void blk_mq_sched_free_rqs(struct request_queue *q)
  571. {
  572. struct blk_mq_hw_ctx *hctx;
  573. unsigned long i;
  574. if (blk_mq_is_shared_tags(q->tag_set->flags)) {
  575. blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
  576. BLK_MQ_NO_HCTX_IDX);
  577. } else {
  578. queue_for_each_hw_ctx(q, hctx, i) {
  579. if (hctx->sched_tags)
  580. blk_mq_free_rqs(q->tag_set,
  581. hctx->sched_tags, i);
  582. }
  583. }
  584. }
  585. void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
  586. {
  587. struct blk_mq_hw_ctx *hctx;
  588. unsigned long i;
  589. unsigned int flags = 0;
  590. queue_for_each_hw_ctx(q, hctx, i) {
  591. if (e->type->ops.exit_hctx && hctx->sched_data) {
  592. e->type->ops.exit_hctx(hctx, i);
  593. hctx->sched_data = NULL;
  594. }
  595. flags = hctx->flags;
  596. }
  597. if (e->type->ops.exit_sched)
  598. e->type->ops.exit_sched(e);
  599. blk_mq_sched_tags_teardown(q, flags);
  600. set_bit(ELEVATOR_FLAG_DYING, &q->elevator->flags);
  601. q->elevator = NULL;
  602. }