elevator.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Block device elevator/IO-scheduler.
  4. *
  5. * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  6. *
  7. * 30042000 Jens Axboe <axboe@kernel.dk> :
  8. *
  9. * Split the elevator a bit so that it is possible to choose a different
  10. * one or even write a new "plug in". There are three pieces:
  11. * - elevator_fn, inserts a new request in the queue list
  12. * - elevator_merge_fn, decides whether a new buffer can be merged with
  13. * an existing request
  14. * - elevator_dequeue_fn, called when a request is taken off the active list
  15. *
  16. * 20082000 Dave Jones <davej@suse.de> :
  17. * Removed tests for max-bomb-segments, which was breaking elvtune
  18. * when run without -bN
  19. *
  20. * Jens:
  21. * - Rework again to work with bio instead of buffer_heads
  22. * - loose bi_dev comparisons, partition handling is right now
  23. * - completely modularize elevator setup and teardown
  24. *
  25. */
  26. #include <linux/kernel.h>
  27. #include <linux/fs.h>
  28. #include <linux/blkdev.h>
  29. #include <linux/bio.h>
  30. #include <linux/module.h>
  31. #include <linux/slab.h>
  32. #include <linux/init.h>
  33. #include <linux/compiler.h>
  34. #include <linux/blktrace_api.h>
  35. #include <linux/hash.h>
  36. #include <linux/uaccess.h>
  37. #include <linux/pm_runtime.h>
  38. #include <trace/events/block.h>
  39. #include "elevator.h"
  40. #include "blk.h"
  41. #include "blk-mq-sched.h"
  42. #include "blk-pm.h"
  43. #include "blk-wbt.h"
  44. #include "blk-cgroup.h"
  45. static DEFINE_SPINLOCK(elv_list_lock);
  46. static LIST_HEAD(elv_list);
  47. /*
  48. * Merge hash stuff.
  49. */
  50. #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
  51. /*
  52. * Query io scheduler to see if the current process issuing bio may be
  53. * merged with rq.
  54. */
  55. static bool elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
  56. {
  57. struct request_queue *q = rq->q;
  58. struct elevator_queue *e = q->elevator;
  59. if (e->type->ops.allow_merge)
  60. return e->type->ops.allow_merge(q, rq, bio);
  61. return true;
  62. }
  63. /*
  64. * can we safely merge with this request?
  65. */
  66. bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
  67. {
  68. if (!blk_rq_merge_ok(rq, bio))
  69. return false;
  70. if (!elv_iosched_allow_bio_merge(rq, bio))
  71. return false;
  72. return true;
  73. }
  74. EXPORT_SYMBOL(elv_bio_merge_ok);
  75. /**
  76. * elevator_match - Check whether @e's name or alias matches @name
  77. * @e: Scheduler to test
  78. * @name: Elevator name to test
  79. *
  80. * Return true if the elevator @e's name or alias matches @name.
  81. */
  82. static bool elevator_match(const struct elevator_type *e, const char *name)
  83. {
  84. return !strcmp(e->elevator_name, name) ||
  85. (e->elevator_alias && !strcmp(e->elevator_alias, name));
  86. }
  87. static struct elevator_type *__elevator_find(const char *name)
  88. {
  89. struct elevator_type *e;
  90. list_for_each_entry(e, &elv_list, list)
  91. if (elevator_match(e, name))
  92. return e;
  93. return NULL;
  94. }
  95. static struct elevator_type *elevator_find_get(const char *name)
  96. {
  97. struct elevator_type *e;
  98. spin_lock(&elv_list_lock);
  99. e = __elevator_find(name);
  100. if (e && (!elevator_tryget(e)))
  101. e = NULL;
  102. spin_unlock(&elv_list_lock);
  103. return e;
  104. }
  105. static const struct kobj_type elv_ktype;
  106. struct elevator_queue *elevator_alloc(struct request_queue *q,
  107. struct elevator_type *e, struct elevator_resources *res)
  108. {
  109. struct elevator_queue *eq;
  110. eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
  111. if (unlikely(!eq))
  112. return NULL;
  113. __elevator_get(e);
  114. eq->type = e;
  115. kobject_init(&eq->kobj, &elv_ktype);
  116. mutex_init(&eq->sysfs_lock);
  117. hash_init(eq->hash);
  118. eq->et = res->et;
  119. eq->elevator_data = res->data;
  120. return eq;
  121. }
  122. static void elevator_release(struct kobject *kobj)
  123. {
  124. struct elevator_queue *e;
  125. e = container_of(kobj, struct elevator_queue, kobj);
  126. elevator_put(e->type);
  127. kfree(e);
  128. }
  129. static void elevator_exit(struct request_queue *q)
  130. {
  131. struct elevator_queue *e = q->elevator;
  132. lockdep_assert_held(&q->elevator_lock);
  133. ioc_clear_queue(q);
  134. mutex_lock(&e->sysfs_lock);
  135. blk_mq_exit_sched(q, e);
  136. mutex_unlock(&e->sysfs_lock);
  137. }
  138. static inline void __elv_rqhash_del(struct request *rq)
  139. {
  140. hash_del(&rq->hash);
  141. rq->rq_flags &= ~RQF_HASHED;
  142. }
  143. void elv_rqhash_del(struct request_queue *q, struct request *rq)
  144. {
  145. if (ELV_ON_HASH(rq))
  146. __elv_rqhash_del(rq);
  147. }
  148. EXPORT_SYMBOL_GPL(elv_rqhash_del);
  149. void elv_rqhash_add(struct request_queue *q, struct request *rq)
  150. {
  151. struct elevator_queue *e = q->elevator;
  152. BUG_ON(ELV_ON_HASH(rq));
  153. hash_add(e->hash, &rq->hash, rq_hash_key(rq));
  154. rq->rq_flags |= RQF_HASHED;
  155. }
  156. EXPORT_SYMBOL_GPL(elv_rqhash_add);
  157. void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
  158. {
  159. __elv_rqhash_del(rq);
  160. elv_rqhash_add(q, rq);
  161. }
  162. struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
  163. {
  164. struct elevator_queue *e = q->elevator;
  165. struct hlist_node *next;
  166. struct request *rq;
  167. hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
  168. BUG_ON(!ELV_ON_HASH(rq));
  169. if (unlikely(!rq_mergeable(rq))) {
  170. __elv_rqhash_del(rq);
  171. continue;
  172. }
  173. if (rq_hash_key(rq) == offset)
  174. return rq;
  175. }
  176. return NULL;
  177. }
  178. /*
  179. * RB-tree support functions for inserting/lookup/removal of requests
  180. * in a sorted RB tree.
  181. */
  182. void elv_rb_add(struct rb_root *root, struct request *rq)
  183. {
  184. struct rb_node **p = &root->rb_node;
  185. struct rb_node *parent = NULL;
  186. struct request *__rq;
  187. while (*p) {
  188. parent = *p;
  189. __rq = rb_entry(parent, struct request, rb_node);
  190. if (blk_rq_pos(rq) < blk_rq_pos(__rq))
  191. p = &(*p)->rb_left;
  192. else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
  193. p = &(*p)->rb_right;
  194. }
  195. rb_link_node(&rq->rb_node, parent, p);
  196. rb_insert_color(&rq->rb_node, root);
  197. }
  198. EXPORT_SYMBOL(elv_rb_add);
  199. void elv_rb_del(struct rb_root *root, struct request *rq)
  200. {
  201. BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
  202. rb_erase(&rq->rb_node, root);
  203. RB_CLEAR_NODE(&rq->rb_node);
  204. }
  205. EXPORT_SYMBOL(elv_rb_del);
  206. struct request *elv_rb_find(struct rb_root *root, sector_t sector)
  207. {
  208. struct rb_node *n = root->rb_node;
  209. struct request *rq;
  210. while (n) {
  211. rq = rb_entry(n, struct request, rb_node);
  212. if (sector < blk_rq_pos(rq))
  213. n = n->rb_left;
  214. else if (sector > blk_rq_pos(rq))
  215. n = n->rb_right;
  216. else
  217. return rq;
  218. }
  219. return NULL;
  220. }
  221. EXPORT_SYMBOL(elv_rb_find);
  222. enum elv_merge elv_merge(struct request_queue *q, struct request **req,
  223. struct bio *bio)
  224. {
  225. struct elevator_queue *e = q->elevator;
  226. struct request *__rq;
  227. /*
  228. * Levels of merges:
  229. * nomerges: No merges at all attempted
  230. * noxmerges: Only simple one-hit cache try
  231. * merges: All merge tries attempted
  232. */
  233. if (blk_queue_nomerges(q) || !bio_mergeable(bio))
  234. return ELEVATOR_NO_MERGE;
  235. /*
  236. * First try one-hit cache.
  237. */
  238. if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
  239. enum elv_merge ret = blk_try_merge(q->last_merge, bio);
  240. if (ret != ELEVATOR_NO_MERGE) {
  241. *req = q->last_merge;
  242. return ret;
  243. }
  244. }
  245. if (blk_queue_noxmerges(q))
  246. return ELEVATOR_NO_MERGE;
  247. /*
  248. * See if our hash lookup can find a potential backmerge.
  249. */
  250. __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
  251. if (__rq && elv_bio_merge_ok(__rq, bio)) {
  252. *req = __rq;
  253. if (blk_discard_mergable(__rq))
  254. return ELEVATOR_DISCARD_MERGE;
  255. return ELEVATOR_BACK_MERGE;
  256. }
  257. if (e->type->ops.request_merge)
  258. return e->type->ops.request_merge(q, req, bio);
  259. return ELEVATOR_NO_MERGE;
  260. }
  261. /*
  262. * Attempt to do an insertion back merge. Only check for the case where
  263. * we can append 'rq' to an existing request, so we can throw 'rq' away
  264. * afterwards.
  265. *
  266. * Returns true if we merged, false otherwise. 'free' will contain all
  267. * requests that need to be freed.
  268. */
  269. bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq,
  270. struct list_head *free)
  271. {
  272. struct request *__rq;
  273. bool ret;
  274. if (blk_queue_nomerges(q))
  275. return false;
  276. /*
  277. * First try one-hit cache.
  278. */
  279. if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) {
  280. list_add(&rq->queuelist, free);
  281. return true;
  282. }
  283. if (blk_queue_noxmerges(q))
  284. return false;
  285. ret = false;
  286. /*
  287. * See if our hash lookup can find a potential backmerge.
  288. */
  289. while (1) {
  290. __rq = elv_rqhash_find(q, blk_rq_pos(rq));
  291. if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
  292. break;
  293. list_add(&rq->queuelist, free);
  294. /* The merged request could be merged with others, try again */
  295. ret = true;
  296. rq = __rq;
  297. }
  298. return ret;
  299. }
  300. void elv_merged_request(struct request_queue *q, struct request *rq,
  301. enum elv_merge type)
  302. {
  303. struct elevator_queue *e = q->elevator;
  304. if (e->type->ops.request_merged)
  305. e->type->ops.request_merged(q, rq, type);
  306. if (type == ELEVATOR_BACK_MERGE)
  307. elv_rqhash_reposition(q, rq);
  308. q->last_merge = rq;
  309. }
  310. void elv_merge_requests(struct request_queue *q, struct request *rq,
  311. struct request *next)
  312. {
  313. struct elevator_queue *e = q->elevator;
  314. if (e->type->ops.requests_merged)
  315. e->type->ops.requests_merged(q, rq, next);
  316. elv_rqhash_reposition(q, rq);
  317. q->last_merge = rq;
  318. }
  319. struct request *elv_latter_request(struct request_queue *q, struct request *rq)
  320. {
  321. struct elevator_queue *e = q->elevator;
  322. if (e->type->ops.next_request)
  323. return e->type->ops.next_request(q, rq);
  324. return NULL;
  325. }
  326. struct request *elv_former_request(struct request_queue *q, struct request *rq)
  327. {
  328. struct elevator_queue *e = q->elevator;
  329. if (e->type->ops.former_request)
  330. return e->type->ops.former_request(q, rq);
  331. return NULL;
  332. }
  333. #define to_elv(atr) container_of_const((atr), struct elv_fs_entry, attr)
  334. static ssize_t
  335. elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  336. {
  337. const struct elv_fs_entry *entry = to_elv(attr);
  338. struct elevator_queue *e;
  339. ssize_t error = -ENODEV;
  340. if (!entry->show)
  341. return -EIO;
  342. e = container_of(kobj, struct elevator_queue, kobj);
  343. mutex_lock(&e->sysfs_lock);
  344. if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
  345. error = entry->show(e, page);
  346. mutex_unlock(&e->sysfs_lock);
  347. return error;
  348. }
  349. static ssize_t
  350. elv_attr_store(struct kobject *kobj, struct attribute *attr,
  351. const char *page, size_t length)
  352. {
  353. const struct elv_fs_entry *entry = to_elv(attr);
  354. struct elevator_queue *e;
  355. ssize_t error = -ENODEV;
  356. if (!entry->store)
  357. return -EIO;
  358. e = container_of(kobj, struct elevator_queue, kobj);
  359. mutex_lock(&e->sysfs_lock);
  360. if (!test_bit(ELEVATOR_FLAG_DYING, &e->flags))
  361. error = entry->store(e, page, length);
  362. mutex_unlock(&e->sysfs_lock);
  363. return error;
  364. }
  365. static const struct sysfs_ops elv_sysfs_ops = {
  366. .show = elv_attr_show,
  367. .store = elv_attr_store,
  368. };
  369. static const struct kobj_type elv_ktype = {
  370. .sysfs_ops = &elv_sysfs_ops,
  371. .release = elevator_release,
  372. };
  373. static int elv_register_queue(struct request_queue *q,
  374. struct elevator_queue *e,
  375. bool uevent)
  376. {
  377. int error;
  378. error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
  379. if (!error) {
  380. const struct elv_fs_entry *attr = e->type->elevator_attrs;
  381. if (attr) {
  382. while (attr->attr.name) {
  383. if (sysfs_create_file(&e->kobj, &attr->attr))
  384. break;
  385. attr++;
  386. }
  387. }
  388. if (uevent)
  389. kobject_uevent(&e->kobj, KOBJ_ADD);
  390. /*
  391. * Sched is initialized, it is ready to export it via
  392. * debugfs
  393. */
  394. blk_mq_sched_reg_debugfs(q);
  395. set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
  396. }
  397. return error;
  398. }
  399. static void elv_unregister_queue(struct request_queue *q,
  400. struct elevator_queue *e)
  401. {
  402. if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
  403. kobject_uevent(&e->kobj, KOBJ_REMOVE);
  404. kobject_del(&e->kobj);
  405. /* unexport via debugfs before exiting sched */
  406. blk_mq_sched_unreg_debugfs(q);
  407. }
  408. }
  409. int elv_register(struct elevator_type *e)
  410. {
  411. /* finish request is mandatory */
  412. if (WARN_ON_ONCE(!e->ops.finish_request))
  413. return -EINVAL;
  414. /* insert_requests and dispatch_request are mandatory */
  415. if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
  416. return -EINVAL;
  417. /* create icq_cache if requested */
  418. if (e->icq_size) {
  419. if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
  420. WARN_ON(e->icq_align < __alignof__(struct io_cq)))
  421. return -EINVAL;
  422. snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
  423. "%s_io_cq", e->elevator_name);
  424. e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
  425. e->icq_align, 0, NULL);
  426. if (!e->icq_cache)
  427. return -ENOMEM;
  428. }
  429. /* register, don't allow duplicate names */
  430. spin_lock(&elv_list_lock);
  431. if (__elevator_find(e->elevator_name)) {
  432. spin_unlock(&elv_list_lock);
  433. kmem_cache_destroy(e->icq_cache);
  434. return -EBUSY;
  435. }
  436. list_add_tail(&e->list, &elv_list);
  437. spin_unlock(&elv_list_lock);
  438. printk(KERN_INFO "io scheduler %s registered\n", e->elevator_name);
  439. return 0;
  440. }
  441. EXPORT_SYMBOL_GPL(elv_register);
  442. void elv_unregister(struct elevator_type *e)
  443. {
  444. /* unregister */
  445. spin_lock(&elv_list_lock);
  446. list_del_init(&e->list);
  447. spin_unlock(&elv_list_lock);
  448. /*
  449. * Destroy icq_cache if it exists. icq's are RCU managed. Make
  450. * sure all RCU operations are complete before proceeding.
  451. */
  452. if (e->icq_cache) {
  453. rcu_barrier();
  454. kmem_cache_destroy(e->icq_cache);
  455. e->icq_cache = NULL;
  456. }
  457. }
  458. EXPORT_SYMBOL_GPL(elv_unregister);
  459. /*
  460. * Switch to new_e io scheduler.
  461. *
  462. * If switching fails, we are most likely running out of memory and not able
  463. * to restore the old io scheduler, so leaving the io scheduler being none.
  464. */
  465. static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
  466. {
  467. struct elevator_type *new_e = NULL;
  468. int ret = 0;
  469. WARN_ON_ONCE(q->mq_freeze_depth == 0);
  470. lockdep_assert_held(&q->elevator_lock);
  471. if (strncmp(ctx->name, "none", 4)) {
  472. new_e = elevator_find_get(ctx->name);
  473. if (!new_e)
  474. return -EINVAL;
  475. }
  476. blk_mq_quiesce_queue(q);
  477. if (q->elevator) {
  478. ctx->old = q->elevator;
  479. elevator_exit(q);
  480. }
  481. if (new_e) {
  482. ret = blk_mq_init_sched(q, new_e, &ctx->res);
  483. if (ret)
  484. goto out_unfreeze;
  485. ctx->new = q->elevator;
  486. } else {
  487. blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
  488. q->elevator = NULL;
  489. q->nr_requests = q->tag_set->queue_depth;
  490. q->async_depth = q->tag_set->queue_depth;
  491. }
  492. blk_add_trace_msg(q, "elv switch: %s", ctx->name);
  493. out_unfreeze:
  494. blk_mq_unquiesce_queue(q);
  495. if (ret) {
  496. pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n",
  497. new_e->elevator_name);
  498. }
  499. if (new_e)
  500. elevator_put(new_e);
  501. return ret;
  502. }
  503. static void elv_exit_and_release(struct elv_change_ctx *ctx,
  504. struct request_queue *q)
  505. {
  506. struct elevator_queue *e;
  507. unsigned memflags;
  508. memflags = blk_mq_freeze_queue(q);
  509. mutex_lock(&q->elevator_lock);
  510. e = q->elevator;
  511. elevator_exit(q);
  512. mutex_unlock(&q->elevator_lock);
  513. blk_mq_unfreeze_queue(q, memflags);
  514. if (e) {
  515. blk_mq_free_sched_res(&ctx->res, ctx->type, q->tag_set);
  516. kobject_put(&e->kobj);
  517. }
  518. }
  519. static int elevator_change_done(struct request_queue *q,
  520. struct elv_change_ctx *ctx)
  521. {
  522. int ret = 0;
  523. if (ctx->old) {
  524. struct elevator_resources res = {
  525. .et = ctx->old->et,
  526. .data = ctx->old->elevator_data
  527. };
  528. elv_unregister_queue(q, ctx->old);
  529. blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set);
  530. kobject_put(&ctx->old->kobj);
  531. }
  532. if (ctx->new) {
  533. ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
  534. if (ret)
  535. elv_exit_and_release(ctx, q);
  536. }
  537. return ret;
  538. }
  539. /*
  540. * Switch this queue to the given IO scheduler.
  541. */
  542. static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
  543. {
  544. unsigned int memflags;
  545. struct blk_mq_tag_set *set = q->tag_set;
  546. int ret = 0;
  547. lockdep_assert_held(&set->update_nr_hwq_lock);
  548. if (strncmp(ctx->name, "none", 4)) {
  549. ret = blk_mq_alloc_sched_res(q, ctx->type, &ctx->res,
  550. set->nr_hw_queues);
  551. if (ret)
  552. return ret;
  553. }
  554. memflags = blk_mq_freeze_queue(q);
  555. /*
  556. * May be called before adding disk, when there isn't any FS I/O,
  557. * so freezing queue plus canceling dispatch work is enough to
  558. * drain any dispatch activities originated from passthrough
  559. * requests, then no need to quiesce queue which may add long boot
  560. * latency, especially when lots of disks are involved.
  561. *
  562. * Disk isn't added yet, so verifying queue lock only manually.
  563. */
  564. blk_mq_cancel_work_sync(q);
  565. mutex_lock(&q->elevator_lock);
  566. if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
  567. ret = elevator_switch(q, ctx);
  568. mutex_unlock(&q->elevator_lock);
  569. blk_mq_unfreeze_queue(q, memflags);
  570. if (!ret)
  571. ret = elevator_change_done(q, ctx);
  572. /*
  573. * Free sched resource if it's allocated but we couldn't switch elevator.
  574. */
  575. if (!ctx->new)
  576. blk_mq_free_sched_res(&ctx->res, ctx->type, set);
  577. return ret;
  578. }
  579. /*
  580. * The I/O scheduler depends on the number of hardware queues, this forces a
  581. * reattachment when nr_hw_queues changes.
  582. */
  583. void elv_update_nr_hw_queues(struct request_queue *q,
  584. struct elv_change_ctx *ctx)
  585. {
  586. struct blk_mq_tag_set *set = q->tag_set;
  587. int ret = -ENODEV;
  588. WARN_ON_ONCE(q->mq_freeze_depth == 0);
  589. if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) {
  590. mutex_lock(&q->elevator_lock);
  591. /* force to reattach elevator after nr_hw_queue is updated */
  592. ret = elevator_switch(q, ctx);
  593. mutex_unlock(&q->elevator_lock);
  594. }
  595. blk_mq_unfreeze_queue_nomemrestore(q);
  596. if (!ret)
  597. WARN_ON_ONCE(elevator_change_done(q, ctx));
  598. /*
  599. * Free sched resource if it's allocated but we couldn't switch elevator.
  600. */
  601. if (!ctx->new)
  602. blk_mq_free_sched_res(&ctx->res, ctx->type, set);
  603. }
  604. /*
  605. * Use the default elevator settings. If the chosen elevator initialization
  606. * fails, fall back to the "none" elevator (no elevator).
  607. */
  608. void elevator_set_default(struct request_queue *q)
  609. {
  610. struct elv_change_ctx ctx = {
  611. .name = "mq-deadline",
  612. .no_uevent = true,
  613. };
  614. int err;
  615. /* now we allow to switch elevator */
  616. blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q);
  617. if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
  618. return;
  619. /*
  620. * For single queue devices, default to using mq-deadline. If we
  621. * have multiple queues or mq-deadline is not available, default
  622. * to "none".
  623. */
  624. ctx.type = elevator_find_get(ctx.name);
  625. if (!ctx.type)
  626. return;
  627. if ((q->nr_hw_queues == 1 ||
  628. blk_mq_is_shared_tags(q->tag_set->flags))) {
  629. err = elevator_change(q, &ctx);
  630. if (err < 0)
  631. pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n",
  632. ctx.name, err);
  633. }
  634. elevator_put(ctx.type);
  635. }
  636. void elevator_set_none(struct request_queue *q)
  637. {
  638. struct elv_change_ctx ctx = {
  639. .name = "none",
  640. };
  641. int err;
  642. err = elevator_change(q, &ctx);
  643. if (err < 0)
  644. pr_warn("%s: set none elevator failed %d\n", __func__, err);
  645. }
  646. static void elv_iosched_load_module(const char *elevator_name)
  647. {
  648. struct elevator_type *found;
  649. spin_lock(&elv_list_lock);
  650. found = __elevator_find(elevator_name);
  651. spin_unlock(&elv_list_lock);
  652. if (!found)
  653. request_module("%s-iosched", elevator_name);
  654. }
  655. ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
  656. size_t count)
  657. {
  658. char elevator_name[ELV_NAME_MAX];
  659. struct elv_change_ctx ctx = {};
  660. int ret;
  661. struct request_queue *q = disk->queue;
  662. struct blk_mq_tag_set *set = q->tag_set;
  663. /* Make sure queue is not in the middle of being removed */
  664. if (!blk_queue_registered(q))
  665. return -ENOENT;
  666. /*
  667. * If the attribute needs to load a module, do it before freezing the
  668. * queue to ensure that the module file can be read when the request
  669. * queue is the one for the device storing the module file.
  670. */
  671. strscpy(elevator_name, buf, sizeof(elevator_name));
  672. ctx.name = strstrip(elevator_name);
  673. elv_iosched_load_module(ctx.name);
  674. ctx.type = elevator_find_get(ctx.name);
  675. /*
  676. * Use trylock to avoid circular lock dependency with kernfs active
  677. * reference during concurrent disk deletion:
  678. * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del)
  679. * kn->active -> update_nr_hwq_lock (via this sysfs write path)
  680. */
  681. if (!down_read_trylock(&set->update_nr_hwq_lock)) {
  682. ret = -EBUSY;
  683. goto out;
  684. }
  685. if (!blk_queue_no_elv_switch(q)) {
  686. ret = elevator_change(q, &ctx);
  687. if (!ret)
  688. ret = count;
  689. } else {
  690. ret = -ENOENT;
  691. }
  692. up_read(&set->update_nr_hwq_lock);
  693. out:
  694. if (ctx.type)
  695. elevator_put(ctx.type);
  696. return ret;
  697. }
  698. ssize_t elv_iosched_show(struct gendisk *disk, char *name)
  699. {
  700. struct request_queue *q = disk->queue;
  701. struct elevator_type *cur = NULL, *e;
  702. int len = 0;
  703. mutex_lock(&q->elevator_lock);
  704. if (!q->elevator) {
  705. len += sprintf(name+len, "[none] ");
  706. } else {
  707. len += sprintf(name+len, "none ");
  708. cur = q->elevator->type;
  709. }
  710. spin_lock(&elv_list_lock);
  711. list_for_each_entry(e, &elv_list, list) {
  712. if (e == cur)
  713. len += sprintf(name+len, "[%s] ", e->elevator_name);
  714. else
  715. len += sprintf(name+len, "%s ", e->elevator_name);
  716. }
  717. spin_unlock(&elv_list_lock);
  718. len += sprintf(name+len, "\n");
  719. mutex_unlock(&q->elevator_lock);
  720. return len;
  721. }
  722. struct request *elv_rb_former_request(struct request_queue *q,
  723. struct request *rq)
  724. {
  725. struct rb_node *rbprev = rb_prev(&rq->rb_node);
  726. if (rbprev)
  727. return rb_entry_rq(rbprev);
  728. return NULL;
  729. }
  730. EXPORT_SYMBOL(elv_rb_former_request);
  731. struct request *elv_rb_latter_request(struct request_queue *q,
  732. struct request *rq)
  733. {
  734. struct rb_node *rbnext = rb_next(&rq->rb_node);
  735. if (rbnext)
  736. return rb_entry_rq(rbnext);
  737. return NULL;
  738. }
  739. EXPORT_SYMBOL(elv_rb_latter_request);
  740. static int __init elevator_setup(char *str)
  741. {
  742. pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
  743. "Please use sysfs to set IO scheduler for individual devices.\n");
  744. return 1;
  745. }
  746. __setup("elevator=", elevator_setup);