blk-sysfs.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Functions related to sysfs handling
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/slab.h>
  7. #include <linux/module.h>
  8. #include <linux/bio.h>
  9. #include <linux/blkdev.h>
  10. #include <linux/backing-dev.h>
  11. #include <linux/blktrace_api.h>
  12. #include <linux/debugfs.h>
  13. #include "blk.h"
  14. #include "blk-mq.h"
  15. #include "blk-mq-debugfs.h"
  16. #include "blk-mq-sched.h"
  17. #include "blk-rq-qos.h"
  18. #include "blk-wbt.h"
  19. #include "blk-cgroup.h"
  20. #include "blk-throttle.h"
  21. struct queue_sysfs_entry {
  22. struct attribute attr;
  23. ssize_t (*show)(struct gendisk *disk, char *page);
  24. ssize_t (*show_limit)(struct gendisk *disk, char *page);
  25. ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
  26. int (*store_limit)(struct gendisk *disk, const char *page,
  27. size_t count, struct queue_limits *lim);
  28. };
  29. static ssize_t
  30. queue_var_show(unsigned long var, char *page)
  31. {
  32. return sysfs_emit(page, "%lu\n", var);
  33. }
  34. static ssize_t
  35. queue_var_store(unsigned long *var, const char *page, size_t count)
  36. {
  37. int err;
  38. unsigned long v;
  39. err = kstrtoul(page, 10, &v);
  40. if (err || v > UINT_MAX)
  41. return -EINVAL;
  42. *var = v;
  43. return count;
  44. }
  45. static ssize_t queue_requests_show(struct gendisk *disk, char *page)
  46. {
  47. ssize_t ret;
  48. mutex_lock(&disk->queue->elevator_lock);
  49. ret = queue_var_show(disk->queue->nr_requests, page);
  50. mutex_unlock(&disk->queue->elevator_lock);
  51. return ret;
  52. }
  53. static ssize_t
  54. queue_requests_store(struct gendisk *disk, const char *page, size_t count)
  55. {
  56. struct request_queue *q = disk->queue;
  57. struct blk_mq_tag_set *set = q->tag_set;
  58. struct elevator_tags *et = NULL;
  59. unsigned int memflags;
  60. unsigned long nr;
  61. int ret;
  62. ret = queue_var_store(&nr, page, count);
  63. if (ret < 0)
  64. return ret;
  65. /*
  66. * Serialize updating nr_requests with concurrent queue_requests_store()
  67. * and switching elevator.
  68. *
  69. * Use trylock to avoid circular lock dependency with kernfs active
  70. * reference during concurrent disk deletion:
  71. * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del)
  72. * kn->active -> update_nr_hwq_lock (via this sysfs write path)
  73. */
  74. if (!down_write_trylock(&set->update_nr_hwq_lock))
  75. return -EBUSY;
  76. if (nr == q->nr_requests)
  77. goto unlock;
  78. if (nr < BLKDEV_MIN_RQ)
  79. nr = BLKDEV_MIN_RQ;
  80. /*
  81. * Switching elevator is protected by update_nr_hwq_lock:
  82. * - read lock is held from elevator sysfs attribute;
  83. * - write lock is held from updating nr_hw_queues;
  84. * Hence it's safe to access q->elevator here with write lock held.
  85. */
  86. if (nr <= set->reserved_tags ||
  87. (q->elevator && nr > MAX_SCHED_RQ) ||
  88. (!q->elevator && nr > set->queue_depth)) {
  89. ret = -EINVAL;
  90. goto unlock;
  91. }
  92. if (!blk_mq_is_shared_tags(set->flags) && q->elevator &&
  93. nr > q->elevator->et->nr_requests) {
  94. /*
  95. * Tags will grow, allocate memory before freezing queue to
  96. * prevent deadlock.
  97. */
  98. et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr);
  99. if (!et) {
  100. ret = -ENOMEM;
  101. goto unlock;
  102. }
  103. }
  104. memflags = blk_mq_freeze_queue(q);
  105. mutex_lock(&q->elevator_lock);
  106. et = blk_mq_update_nr_requests(q, et, nr);
  107. mutex_unlock(&q->elevator_lock);
  108. blk_mq_unfreeze_queue(q, memflags);
  109. if (et)
  110. blk_mq_free_sched_tags(et, set);
  111. unlock:
  112. up_write(&set->update_nr_hwq_lock);
  113. return ret;
  114. }
  115. static ssize_t queue_async_depth_show(struct gendisk *disk, char *page)
  116. {
  117. guard(mutex)(&disk->queue->elevator_lock);
  118. return queue_var_show(disk->queue->async_depth, page);
  119. }
  120. static ssize_t
  121. queue_async_depth_store(struct gendisk *disk, const char *page, size_t count)
  122. {
  123. struct request_queue *q = disk->queue;
  124. unsigned int memflags;
  125. unsigned long nr;
  126. int ret;
  127. if (!queue_is_mq(q))
  128. return -EINVAL;
  129. ret = queue_var_store(&nr, page, count);
  130. if (ret < 0)
  131. return ret;
  132. if (nr == 0)
  133. return -EINVAL;
  134. memflags = blk_mq_freeze_queue(q);
  135. scoped_guard(mutex, &q->elevator_lock) {
  136. if (q->elevator) {
  137. q->async_depth = min(q->nr_requests, nr);
  138. if (q->elevator->type->ops.depth_updated)
  139. q->elevator->type->ops.depth_updated(q);
  140. } else {
  141. ret = -EINVAL;
  142. }
  143. }
  144. blk_mq_unfreeze_queue(q, memflags);
  145. return ret;
  146. }
  147. static ssize_t queue_ra_show(struct gendisk *disk, char *page)
  148. {
  149. ssize_t ret;
  150. mutex_lock(&disk->queue->limits_lock);
  151. ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
  152. mutex_unlock(&disk->queue->limits_lock);
  153. return ret;
  154. }
  155. static ssize_t
  156. queue_ra_store(struct gendisk *disk, const char *page, size_t count)
  157. {
  158. unsigned long ra_kb;
  159. ssize_t ret;
  160. struct request_queue *q = disk->queue;
  161. ret = queue_var_store(&ra_kb, page, count);
  162. if (ret < 0)
  163. return ret;
  164. /*
  165. * The ->ra_pages change below is protected by ->limits_lock because it
  166. * is usually calculated from the queue limits by
  167. * queue_limits_commit_update().
  168. *
  169. * bdi->ra_pages reads are not serialized against bdi->ra_pages writes.
  170. * Use WRITE_ONCE() to write bdi->ra_pages once.
  171. */
  172. mutex_lock(&q->limits_lock);
  173. WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10));
  174. mutex_unlock(&q->limits_lock);
  175. return ret;
  176. }
  177. #define QUEUE_SYSFS_LIMIT_SHOW(_field) \
  178. static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
  179. { \
  180. return queue_var_show(disk->queue->limits._field, page); \
  181. }
  182. QUEUE_SYSFS_LIMIT_SHOW(max_segments)
  183. QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
  184. QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
  185. QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
  186. QUEUE_SYSFS_LIMIT_SHOW(max_write_streams)
  187. QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity)
  188. QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
  189. QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
  190. QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
  191. QUEUE_SYSFS_LIMIT_SHOW(io_min)
  192. QUEUE_SYSFS_LIMIT_SHOW(io_opt)
  193. QUEUE_SYSFS_LIMIT_SHOW(discard_granularity)
  194. QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity)
  195. QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask)
  196. QUEUE_SYSFS_LIMIT_SHOW(dma_alignment)
  197. QUEUE_SYSFS_LIMIT_SHOW(max_open_zones)
  198. QUEUE_SYSFS_LIMIT_SHOW(max_active_zones)
  199. QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min)
  200. QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max)
  201. #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \
  202. static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
  203. { \
  204. return sysfs_emit(page, "%llu\n", \
  205. (unsigned long long)disk->queue->limits._field << \
  206. SECTOR_SHIFT); \
  207. }
  208. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors)
  209. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
  210. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
  211. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors)
  212. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors)
  213. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
  214. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
  215. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors)
  216. #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \
  217. static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
  218. { \
  219. return queue_var_show(disk->queue->limits._field >> 1, page); \
  220. }
  221. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors)
  222. QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
  223. #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
  224. static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
  225. { \
  226. return sysfs_emit(page, "%d\n", _val); \
  227. }
  228. /* deprecated fields */
  229. QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
  230. QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
  231. QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
  232. static int queue_max_discard_sectors_store(struct gendisk *disk,
  233. const char *page, size_t count, struct queue_limits *lim)
  234. {
  235. unsigned long max_discard_bytes;
  236. ssize_t ret;
  237. ret = queue_var_store(&max_discard_bytes, page, count);
  238. if (ret < 0)
  239. return ret;
  240. if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1))
  241. return -EINVAL;
  242. if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
  243. return -EINVAL;
  244. lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
  245. return 0;
  246. }
  247. static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk,
  248. const char *page, size_t count, struct queue_limits *lim)
  249. {
  250. unsigned long max_zeroes_bytes, max_hw_zeroes_bytes;
  251. ssize_t ret;
  252. ret = queue_var_store(&max_zeroes_bytes, page, count);
  253. if (ret < 0)
  254. return ret;
  255. max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT;
  256. if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes)
  257. return -EINVAL;
  258. lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT;
  259. return 0;
  260. }
  261. static int
  262. queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count,
  263. struct queue_limits *lim)
  264. {
  265. unsigned long max_sectors_kb;
  266. ssize_t ret;
  267. ret = queue_var_store(&max_sectors_kb, page, count);
  268. if (ret < 0)
  269. return ret;
  270. lim->max_user_sectors = max_sectors_kb << 1;
  271. return 0;
  272. }
  273. static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
  274. size_t count, struct queue_limits *lim, blk_features_t feature)
  275. {
  276. unsigned long val;
  277. ssize_t ret;
  278. ret = queue_var_store(&val, page, count);
  279. if (ret < 0)
  280. return ret;
  281. if (val)
  282. lim->features |= feature;
  283. else
  284. lim->features &= ~feature;
  285. return 0;
  286. }
  287. #define QUEUE_SYSFS_FEATURE(_name, _feature) \
  288. static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
  289. { \
  290. return sysfs_emit(page, "%u\n", \
  291. !!(disk->queue->limits.features & _feature)); \
  292. } \
  293. static int queue_##_name##_store(struct gendisk *disk, \
  294. const char *page, size_t count, struct queue_limits *lim) \
  295. { \
  296. return queue_feature_store(disk, page, count, lim, _feature); \
  297. }
  298. QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
  299. QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
  300. QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
  301. QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
  302. #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \
  303. static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
  304. { \
  305. return sysfs_emit(page, "%u\n", \
  306. !!(disk->queue->limits.features & _feature)); \
  307. }
  308. QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
  309. QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
  310. static ssize_t queue_poll_show(struct gendisk *disk, char *page)
  311. {
  312. if (queue_is_mq(disk->queue))
  313. return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
  314. return sysfs_emit(page, "%u\n",
  315. !!(disk->queue->limits.features & BLK_FEAT_POLL));
  316. }
  317. static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
  318. {
  319. if (blk_queue_is_zoned(disk->queue))
  320. return sysfs_emit(page, "host-managed\n");
  321. return sysfs_emit(page, "none\n");
  322. }
  323. static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
  324. {
  325. return queue_var_show(disk_nr_zones(disk), page);
  326. }
  327. static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
  328. {
  329. return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page);
  330. }
  331. static int queue_iostats_passthrough_store(struct gendisk *disk,
  332. const char *page, size_t count, struct queue_limits *lim)
  333. {
  334. unsigned long ios;
  335. ssize_t ret;
  336. ret = queue_var_store(&ios, page, count);
  337. if (ret < 0)
  338. return ret;
  339. if (ios)
  340. lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH;
  341. else
  342. lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH;
  343. return 0;
  344. }
  345. static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
  346. {
  347. return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
  348. blk_queue_noxmerges(disk->queue), page);
  349. }
  350. static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
  351. size_t count)
  352. {
  353. unsigned long nm;
  354. struct request_queue *q = disk->queue;
  355. ssize_t ret = queue_var_store(&nm, page, count);
  356. if (ret < 0)
  357. return ret;
  358. blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
  359. blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
  360. if (nm == 2)
  361. blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
  362. else if (nm)
  363. blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
  364. return ret;
  365. }
  366. static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
  367. {
  368. bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
  369. bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
  370. return queue_var_show(set << force, page);
  371. }
  372. static ssize_t
  373. queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
  374. {
  375. ssize_t ret = -EINVAL;
  376. #ifdef CONFIG_SMP
  377. struct request_queue *q = disk->queue;
  378. unsigned long val;
  379. ret = queue_var_store(&val, page, count);
  380. if (ret < 0)
  381. return ret;
  382. /*
  383. * Here we update two queue flags each using atomic bitops, although
  384. * updating two flags isn't atomic it should be harmless as those flags
  385. * are accessed individually using atomic test_bit operation. So we
  386. * don't grab any lock while updating these flags.
  387. */
  388. if (val == 2) {
  389. blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
  390. blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
  391. } else if (val == 1) {
  392. blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
  393. blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
  394. } else if (val == 0) {
  395. blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
  396. blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
  397. }
  398. #endif
  399. return ret;
  400. }
  401. static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
  402. size_t count)
  403. {
  404. return count;
  405. }
  406. static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
  407. size_t count)
  408. {
  409. ssize_t ret = count;
  410. struct request_queue *q = disk->queue;
  411. if (!(q->limits.features & BLK_FEAT_POLL)) {
  412. ret = -EINVAL;
  413. goto out;
  414. }
  415. pr_info_ratelimited("writes to the poll attribute are ignored.\n");
  416. pr_info_ratelimited("please use driver specific parameters instead.\n");
  417. out:
  418. return ret;
  419. }
  420. static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
  421. {
  422. return sysfs_emit(page, "%u\n",
  423. jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
  424. }
  425. static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
  426. size_t count)
  427. {
  428. unsigned int val;
  429. int err;
  430. struct request_queue *q = disk->queue;
  431. err = kstrtou32(page, 10, &val);
  432. if (err || val == 0)
  433. return -EINVAL;
  434. blk_queue_rq_timeout(q, msecs_to_jiffies(val));
  435. return count;
  436. }
  437. static ssize_t queue_wc_show(struct gendisk *disk, char *page)
  438. {
  439. if (blk_queue_write_cache(disk->queue))
  440. return sysfs_emit(page, "write back\n");
  441. return sysfs_emit(page, "write through\n");
  442. }
  443. static int queue_wc_store(struct gendisk *disk, const char *page,
  444. size_t count, struct queue_limits *lim)
  445. {
  446. bool disable;
  447. if (!strncmp(page, "write back", 10)) {
  448. disable = false;
  449. } else if (!strncmp(page, "write through", 13) ||
  450. !strncmp(page, "none", 4)) {
  451. disable = true;
  452. } else {
  453. return -EINVAL;
  454. }
  455. if (disable)
  456. lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
  457. else
  458. lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
  459. return 0;
  460. }
  461. #define QUEUE_RO_ENTRY(_prefix, _name) \
  462. static struct queue_sysfs_entry _prefix##_entry = { \
  463. .attr = { .name = _name, .mode = 0444 }, \
  464. .show = _prefix##_show, \
  465. };
  466. #define QUEUE_RW_ENTRY(_prefix, _name) \
  467. static struct queue_sysfs_entry _prefix##_entry = { \
  468. .attr = { .name = _name, .mode = 0644 }, \
  469. .show = _prefix##_show, \
  470. .store = _prefix##_store, \
  471. };
  472. #define QUEUE_LIM_RO_ENTRY(_prefix, _name) \
  473. static struct queue_sysfs_entry _prefix##_entry = { \
  474. .attr = { .name = _name, .mode = 0444 }, \
  475. .show_limit = _prefix##_show, \
  476. }
  477. #define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
  478. static struct queue_sysfs_entry _prefix##_entry = { \
  479. .attr = { .name = _name, .mode = 0644 }, \
  480. .show_limit = _prefix##_show, \
  481. .store_limit = _prefix##_store, \
  482. }
  483. QUEUE_RW_ENTRY(queue_requests, "nr_requests");
  484. QUEUE_RW_ENTRY(queue_async_depth, "async_depth");
  485. QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
  486. QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
  487. QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
  488. QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments");
  489. QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
  490. QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size");
  491. QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams");
  492. QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity");
  493. QUEUE_RW_ENTRY(elv_iosched, "scheduler");
  494. QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size");
  495. QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size");
  496. QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
  497. QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size");
  498. QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size");
  499. QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
  500. QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity");
  501. QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
  502. QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
  503. QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
  504. QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
  505. QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors,
  506. "atomic_write_boundary_bytes");
  507. QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
  508. QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
  509. QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
  510. QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
  511. QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors,
  512. "write_zeroes_unmap_max_hw_bytes");
  513. QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors,
  514. "write_zeroes_unmap_max_bytes");
  515. QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
  516. QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
  517. QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
  518. QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
  519. QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
  520. QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
  521. QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
  522. QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
  523. QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
  524. QUEUE_RW_ENTRY(queue_poll, "io_poll");
  525. QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
  526. QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
  527. QUEUE_LIM_RO_ENTRY(queue_fua, "fua");
  528. QUEUE_LIM_RO_ENTRY(queue_dax, "dax");
  529. QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
  530. QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
  531. QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment");
  532. /* legacy alias for logical_block_size: */
  533. static struct queue_sysfs_entry queue_hw_sector_size_entry = {
  534. .attr = {.name = "hw_sector_size", .mode = 0444 },
  535. .show_limit = queue_logical_block_size_show,
  536. };
  537. QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational");
  538. QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats");
  539. QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random");
  540. QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes");
  541. #ifdef CONFIG_BLK_WBT
  542. static ssize_t queue_var_store64(s64 *var, const char *page)
  543. {
  544. int err;
  545. s64 v;
  546. err = kstrtos64(page, 10, &v);
  547. if (err < 0)
  548. return err;
  549. *var = v;
  550. return 0;
  551. }
  552. static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
  553. {
  554. ssize_t ret;
  555. struct request_queue *q = disk->queue;
  556. mutex_lock(&disk->rqos_state_mutex);
  557. if (!wbt_rq_qos(q)) {
  558. ret = -EINVAL;
  559. goto out;
  560. }
  561. if (wbt_disabled(q)) {
  562. ret = sysfs_emit(page, "0\n");
  563. goto out;
  564. }
  565. ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
  566. out:
  567. mutex_unlock(&disk->rqos_state_mutex);
  568. return ret;
  569. }
  570. static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
  571. size_t count)
  572. {
  573. ssize_t ret;
  574. s64 val;
  575. ret = queue_var_store64(&val, page);
  576. if (ret < 0)
  577. return ret;
  578. if (val < -1)
  579. return -EINVAL;
  580. ret = wbt_set_lat(disk, val);
  581. return ret ? ret : count;
  582. }
  583. QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
  584. #endif
  585. /* Common attributes for bio-based and request-based queues. */
  586. static struct attribute *queue_attrs[] = {
  587. /*
  588. * Attributes which are protected with q->limits_lock.
  589. */
  590. &queue_max_hw_sectors_entry.attr,
  591. &queue_max_sectors_entry.attr,
  592. &queue_max_segments_entry.attr,
  593. &queue_max_discard_segments_entry.attr,
  594. &queue_max_integrity_segments_entry.attr,
  595. &queue_max_segment_size_entry.attr,
  596. &queue_max_write_streams_entry.attr,
  597. &queue_write_stream_granularity_entry.attr,
  598. &queue_hw_sector_size_entry.attr,
  599. &queue_logical_block_size_entry.attr,
  600. &queue_physical_block_size_entry.attr,
  601. &queue_chunk_sectors_entry.attr,
  602. &queue_io_min_entry.attr,
  603. &queue_io_opt_entry.attr,
  604. &queue_discard_granularity_entry.attr,
  605. &queue_max_discard_sectors_entry.attr,
  606. &queue_max_hw_discard_sectors_entry.attr,
  607. &queue_atomic_write_max_sectors_entry.attr,
  608. &queue_atomic_write_boundary_sectors_entry.attr,
  609. &queue_atomic_write_unit_min_entry.attr,
  610. &queue_atomic_write_unit_max_entry.attr,
  611. &queue_max_write_zeroes_sectors_entry.attr,
  612. &queue_max_hw_wzeroes_unmap_sectors_entry.attr,
  613. &queue_max_wzeroes_unmap_sectors_entry.attr,
  614. &queue_max_zone_append_sectors_entry.attr,
  615. &queue_zone_write_granularity_entry.attr,
  616. &queue_rotational_entry.attr,
  617. &queue_zoned_entry.attr,
  618. &queue_max_open_zones_entry.attr,
  619. &queue_max_active_zones_entry.attr,
  620. &queue_iostats_passthrough_entry.attr,
  621. &queue_iostats_entry.attr,
  622. &queue_stable_writes_entry.attr,
  623. &queue_add_random_entry.attr,
  624. &queue_wc_entry.attr,
  625. &queue_fua_entry.attr,
  626. &queue_dax_entry.attr,
  627. &queue_virt_boundary_mask_entry.attr,
  628. &queue_dma_alignment_entry.attr,
  629. &queue_ra_entry.attr,
  630. /*
  631. * Attributes which don't require locking.
  632. */
  633. &queue_discard_zeroes_data_entry.attr,
  634. &queue_write_same_max_entry.attr,
  635. &queue_nr_zones_entry.attr,
  636. &queue_nomerges_entry.attr,
  637. &queue_poll_entry.attr,
  638. &queue_poll_delay_entry.attr,
  639. NULL,
  640. };
  641. /* Request-based queue attributes that are not relevant for bio-based queues. */
  642. static struct attribute *blk_mq_queue_attrs[] = {
  643. /*
  644. * Attributes which require some form of locking other than
  645. * q->sysfs_lock.
  646. */
  647. &elv_iosched_entry.attr,
  648. &queue_requests_entry.attr,
  649. &queue_async_depth_entry.attr,
  650. #ifdef CONFIG_BLK_WBT
  651. &queue_wb_lat_entry.attr,
  652. #endif
  653. /*
  654. * Attributes which don't require locking.
  655. */
  656. &queue_rq_affinity_entry.attr,
  657. &queue_io_timeout_entry.attr,
  658. NULL,
  659. };
  660. static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
  661. int n)
  662. {
  663. struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
  664. struct request_queue *q = disk->queue;
  665. if ((attr == &queue_max_open_zones_entry.attr ||
  666. attr == &queue_max_active_zones_entry.attr) &&
  667. !blk_queue_is_zoned(q))
  668. return 0;
  669. return attr->mode;
  670. }
  671. static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
  672. struct attribute *attr, int n)
  673. {
  674. struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
  675. struct request_queue *q = disk->queue;
  676. if (!queue_is_mq(q))
  677. return 0;
  678. if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout)
  679. return 0;
  680. return attr->mode;
  681. }
  682. static struct attribute_group queue_attr_group = {
  683. .attrs = queue_attrs,
  684. .is_visible = queue_attr_visible,
  685. };
  686. static struct attribute_group blk_mq_queue_attr_group = {
  687. .attrs = blk_mq_queue_attrs,
  688. .is_visible = blk_mq_queue_attr_visible,
  689. };
  690. #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
  691. static ssize_t
  692. queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  693. {
  694. struct queue_sysfs_entry *entry = to_queue(attr);
  695. struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
  696. if (!entry->show && !entry->show_limit)
  697. return -EIO;
  698. if (entry->show_limit) {
  699. ssize_t res;
  700. mutex_lock(&disk->queue->limits_lock);
  701. res = entry->show_limit(disk, page);
  702. mutex_unlock(&disk->queue->limits_lock);
  703. return res;
  704. }
  705. return entry->show(disk, page);
  706. }
  707. static ssize_t
  708. queue_attr_store(struct kobject *kobj, struct attribute *attr,
  709. const char *page, size_t length)
  710. {
  711. struct queue_sysfs_entry *entry = to_queue(attr);
  712. struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
  713. struct request_queue *q = disk->queue;
  714. if (!entry->store_limit && !entry->store)
  715. return -EIO;
  716. if (entry->store_limit) {
  717. ssize_t res;
  718. struct queue_limits lim = queue_limits_start_update(q);
  719. res = entry->store_limit(disk, page, length, &lim);
  720. if (res < 0) {
  721. queue_limits_cancel_update(q);
  722. return res;
  723. }
  724. res = queue_limits_commit_update_frozen(q, &lim);
  725. if (res)
  726. return res;
  727. return length;
  728. }
  729. return entry->store(disk, page, length);
  730. }
  731. static const struct sysfs_ops queue_sysfs_ops = {
  732. .show = queue_attr_show,
  733. .store = queue_attr_store,
  734. };
  735. static const struct attribute_group *blk_queue_attr_groups[] = {
  736. &queue_attr_group,
  737. &blk_mq_queue_attr_group,
  738. NULL
  739. };
  740. static void blk_queue_release(struct kobject *kobj)
  741. {
  742. /* nothing to do here, all data is associated with the parent gendisk */
  743. }
  744. const struct kobj_type blk_queue_ktype = {
  745. .default_groups = blk_queue_attr_groups,
  746. .sysfs_ops = &queue_sysfs_ops,
  747. .release = blk_queue_release,
  748. };
  749. static void blk_debugfs_remove(struct gendisk *disk)
  750. {
  751. struct request_queue *q = disk->queue;
  752. blk_debugfs_lock_nomemsave(q);
  753. blk_trace_shutdown(q);
  754. debugfs_remove_recursive(q->debugfs_dir);
  755. q->debugfs_dir = NULL;
  756. q->sched_debugfs_dir = NULL;
  757. q->rqos_debugfs_dir = NULL;
  758. blk_debugfs_unlock_nomemrestore(q);
  759. }
  760. /**
  761. * blk_register_queue - register a block layer queue with sysfs
  762. * @disk: Disk of which the request queue should be registered with sysfs.
  763. */
  764. int blk_register_queue(struct gendisk *disk)
  765. {
  766. struct request_queue *q = disk->queue;
  767. unsigned int memflags;
  768. int ret;
  769. ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
  770. if (ret < 0)
  771. return ret;
  772. if (queue_is_mq(q)) {
  773. ret = blk_mq_sysfs_register(disk);
  774. if (ret)
  775. goto out_del_queue_kobj;
  776. }
  777. mutex_lock(&q->sysfs_lock);
  778. memflags = blk_debugfs_lock(q);
  779. q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
  780. if (queue_is_mq(q))
  781. blk_mq_debugfs_register(q);
  782. blk_debugfs_unlock(q, memflags);
  783. ret = disk_register_independent_access_ranges(disk);
  784. if (ret)
  785. goto out_debugfs_remove;
  786. ret = blk_crypto_sysfs_register(disk);
  787. if (ret)
  788. goto out_unregister_ia_ranges;
  789. if (queue_is_mq(q))
  790. elevator_set_default(q);
  791. blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
  792. wbt_init_enable_default(disk);
  793. /* Now everything is ready and send out KOBJ_ADD uevent */
  794. kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
  795. if (q->elevator)
  796. kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
  797. mutex_unlock(&q->sysfs_lock);
  798. /*
  799. * SCSI probing may synchronously create and destroy a lot of
  800. * request_queues for non-existent devices. Shutting down a fully
  801. * functional queue takes measureable wallclock time as RCU grace
  802. * periods are involved. To avoid excessive latency in these
  803. * cases, a request_queue starts out in a degraded mode which is
  804. * faster to shut down and is made fully functional here as
  805. * request_queues for non-existent devices never get registered.
  806. */
  807. blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
  808. percpu_ref_switch_to_percpu(&q->q_usage_counter);
  809. return ret;
  810. out_unregister_ia_ranges:
  811. disk_unregister_independent_access_ranges(disk);
  812. out_debugfs_remove:
  813. blk_debugfs_remove(disk);
  814. mutex_unlock(&q->sysfs_lock);
  815. if (queue_is_mq(q))
  816. blk_mq_sysfs_unregister(disk);
  817. out_del_queue_kobj:
  818. kobject_del(&disk->queue_kobj);
  819. return ret;
  820. }
  821. /**
  822. * blk_unregister_queue - counterpart of blk_register_queue()
  823. * @disk: Disk of which the request queue should be unregistered from sysfs.
  824. *
  825. * Note: the caller is responsible for guaranteeing that this function is called
  826. * after blk_register_queue() has finished.
  827. */
  828. void blk_unregister_queue(struct gendisk *disk)
  829. {
  830. struct request_queue *q = disk->queue;
  831. if (WARN_ON(!q))
  832. return;
  833. /* Return early if disk->queue was never registered. */
  834. if (!blk_queue_registered(q))
  835. return;
  836. /*
  837. * Since sysfs_remove_dir() prevents adding new directory entries
  838. * before removal of existing entries starts, protect against
  839. * concurrent elv_iosched_store() calls.
  840. */
  841. mutex_lock(&q->sysfs_lock);
  842. blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
  843. mutex_unlock(&q->sysfs_lock);
  844. /*
  845. * Remove the sysfs attributes before unregistering the queue data
  846. * structures that can be modified through sysfs.
  847. */
  848. if (queue_is_mq(q))
  849. blk_mq_sysfs_unregister(disk);
  850. blk_crypto_sysfs_unregister(disk);
  851. mutex_lock(&q->sysfs_lock);
  852. disk_unregister_independent_access_ranges(disk);
  853. mutex_unlock(&q->sysfs_lock);
  854. /* Now that we've deleted all child objects, we can delete the queue. */
  855. kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE);
  856. kobject_del(&disk->queue_kobj);
  857. if (queue_is_mq(q))
  858. elevator_set_none(q);
  859. blk_debugfs_remove(disk);
  860. }