blk-zoned.c 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Zoned block device handling
  4. *
  5. * Copyright (c) 2015, Hannes Reinecke
  6. * Copyright (c) 2015, SUSE Linux GmbH
  7. *
  8. * Copyright (c) 2016, Damien Le Moal
  9. * Copyright (c) 2016, Western Digital
  10. * Copyright (c) 2024, Western Digital Corporation or its affiliates.
  11. */
  12. #include <linux/kernel.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/blk-mq.h>
  15. #include <linux/spinlock.h>
  16. #include <linux/refcount.h>
  17. #include <linux/mempool.h>
  18. #include <trace/events/block.h>
  19. #include "blk.h"
  20. #include "blk-mq-sched.h"
  21. #include "blk-mq-debugfs.h"
  22. #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
  23. static const char *const zone_cond_name[] = {
  24. ZONE_COND_NAME(NOT_WP),
  25. ZONE_COND_NAME(EMPTY),
  26. ZONE_COND_NAME(IMP_OPEN),
  27. ZONE_COND_NAME(EXP_OPEN),
  28. ZONE_COND_NAME(CLOSED),
  29. ZONE_COND_NAME(READONLY),
  30. ZONE_COND_NAME(FULL),
  31. ZONE_COND_NAME(OFFLINE),
  32. ZONE_COND_NAME(ACTIVE),
  33. };
  34. #undef ZONE_COND_NAME
  35. /*
  36. * Per-zone write plug.
  37. * @node: hlist_node structure for managing the plug using a hash table.
  38. * @bio_list: The list of BIOs that are currently plugged.
  39. * @bio_work: Work struct to handle issuing of plugged BIOs
  40. * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
  41. * @disk: The gendisk the plug belongs to.
  42. * @lock: Spinlock to atomically manipulate the plug.
  43. * @ref: Zone write plug reference counter. A zone write plug reference is
  44. * always at least 1 when the plug is hashed in the disk plug hash table.
  45. * The reference is incremented whenever a new BIO needing plugging is
  46. * submitted and when a function needs to manipulate a plug. The
  47. * reference count is decremented whenever a plugged BIO completes and
  48. * when a function that referenced the plug returns. The initial
  49. * reference is dropped whenever the zone of the zone write plug is reset,
  50. * finished and when the zone becomes full (last write BIO to the zone
  51. * completes).
  52. * @flags: Flags indicating the plug state.
  53. * @zone_no: The number of the zone the plug is managing.
  54. * @wp_offset: The zone write pointer location relative to the start of the zone
  55. * as a number of 512B sectors.
  56. * @cond: Condition of the zone
  57. */
  58. struct blk_zone_wplug {
  59. struct hlist_node node;
  60. struct bio_list bio_list;
  61. struct work_struct bio_work;
  62. struct rcu_head rcu_head;
  63. struct gendisk *disk;
  64. spinlock_t lock;
  65. refcount_t ref;
  66. unsigned int flags;
  67. unsigned int zone_no;
  68. unsigned int wp_offset;
  69. enum blk_zone_cond cond;
  70. };
  71. static inline bool disk_need_zone_resources(struct gendisk *disk)
  72. {
  73. /*
  74. * All request-based zoned devices need zone resources so that the
  75. * block layer can automatically handle write BIO plugging. BIO-based
  76. * device drivers (e.g. DM devices) are normally responsible for
  77. * handling zone write ordering and do not need zone resources, unless
  78. * the driver requires zone append emulation.
  79. */
  80. return queue_is_mq(disk->queue) ||
  81. queue_emulates_zone_append(disk->queue);
  82. }
  83. static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
  84. {
  85. return 1U << disk->zone_wplugs_hash_bits;
  86. }
  87. /*
  88. * Zone write plug flags bits:
  89. * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
  90. * that is, that write BIOs are being throttled due to a write BIO already
  91. * being executed or the zone write plug bio list is not empty.
  92. * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
  93. * write pointer offset and need to update it.
  94. * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
  95. * from the disk hash table and that the initial reference to the zone
  96. * write plug set when the plug was first added to the hash table has been
  97. * dropped. This flag is set when a zone is reset, finished or become full,
  98. * to prevent new references to the zone write plug to be taken for
  99. * newly incoming BIOs. A zone write plug flagged with this flag will be
  100. * freed once all remaining references from BIOs or functions are dropped.
  101. */
  102. #define BLK_ZONE_WPLUG_PLUGGED (1U << 0)
  103. #define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1)
  104. #define BLK_ZONE_WPLUG_UNHASHED (1U << 2)
  105. /**
  106. * blk_zone_cond_str - Return a zone condition name string
  107. * @zone_cond: a zone condition BLK_ZONE_COND_name
  108. *
  109. * Convert a BLK_ZONE_COND_name zone condition into the string "name". Useful
  110. * for the debugging and tracing zone conditions. For an invalid zone
  111. * conditions, the string "UNKNOWN" is returned.
  112. */
  113. const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
  114. {
  115. static const char *zone_cond_str = "UNKNOWN";
  116. if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
  117. zone_cond_str = zone_cond_name[zone_cond];
  118. return zone_cond_str;
  119. }
  120. EXPORT_SYMBOL_GPL(blk_zone_cond_str);
  121. static void blk_zone_set_cond(u8 *zones_cond, unsigned int zno,
  122. enum blk_zone_cond cond)
  123. {
  124. if (!zones_cond)
  125. return;
  126. switch (cond) {
  127. case BLK_ZONE_COND_IMP_OPEN:
  128. case BLK_ZONE_COND_EXP_OPEN:
  129. case BLK_ZONE_COND_CLOSED:
  130. zones_cond[zno] = BLK_ZONE_COND_ACTIVE;
  131. return;
  132. case BLK_ZONE_COND_NOT_WP:
  133. case BLK_ZONE_COND_EMPTY:
  134. case BLK_ZONE_COND_FULL:
  135. case BLK_ZONE_COND_OFFLINE:
  136. case BLK_ZONE_COND_READONLY:
  137. default:
  138. zones_cond[zno] = cond;
  139. return;
  140. }
  141. }
  142. static void disk_zone_set_cond(struct gendisk *disk, sector_t sector,
  143. enum blk_zone_cond cond)
  144. {
  145. u8 *zones_cond;
  146. rcu_read_lock();
  147. zones_cond = rcu_dereference(disk->zones_cond);
  148. if (zones_cond) {
  149. unsigned int zno = disk_zone_no(disk, sector);
  150. /*
  151. * The condition of a conventional, readonly and offline zones
  152. * never changes, so do nothing if the target zone is in one of
  153. * these conditions.
  154. */
  155. switch (zones_cond[zno]) {
  156. case BLK_ZONE_COND_NOT_WP:
  157. case BLK_ZONE_COND_READONLY:
  158. case BLK_ZONE_COND_OFFLINE:
  159. break;
  160. default:
  161. blk_zone_set_cond(zones_cond, zno, cond);
  162. break;
  163. }
  164. }
  165. rcu_read_unlock();
  166. }
  167. /**
  168. * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
  169. * @bdev: block device to check
  170. * @sector: sector number
  171. *
  172. * Check if @sector on @bdev is contained in a sequential write required zone.
  173. */
  174. bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
  175. {
  176. struct gendisk *disk = bdev->bd_disk;
  177. unsigned int zno = disk_zone_no(disk, sector);
  178. bool is_seq = false;
  179. u8 *zones_cond;
  180. if (!bdev_is_zoned(bdev))
  181. return false;
  182. rcu_read_lock();
  183. zones_cond = rcu_dereference(disk->zones_cond);
  184. if (zones_cond && zno < disk->nr_zones)
  185. is_seq = zones_cond[zno] != BLK_ZONE_COND_NOT_WP;
  186. rcu_read_unlock();
  187. return is_seq;
  188. }
  189. EXPORT_SYMBOL_GPL(bdev_zone_is_seq);
  190. /*
  191. * Zone report arguments for block device drivers report_zones operation.
  192. * @cb: report_zones_cb callback for each reported zone.
  193. * @data: Private data passed to report_zones_cb.
  194. */
  195. struct blk_report_zones_args {
  196. report_zones_cb cb;
  197. void *data;
  198. bool report_active;
  199. };
  200. static int blkdev_do_report_zones(struct block_device *bdev, sector_t sector,
  201. unsigned int nr_zones,
  202. struct blk_report_zones_args *args)
  203. {
  204. struct gendisk *disk = bdev->bd_disk;
  205. if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones))
  206. return -EOPNOTSUPP;
  207. if (!nr_zones || sector >= get_capacity(disk))
  208. return 0;
  209. return disk->fops->report_zones(disk, sector, nr_zones, args);
  210. }
  211. /**
  212. * blkdev_report_zones - Get zones information
  213. * @bdev: Target block device
  214. * @sector: Sector from which to report zones
  215. * @nr_zones: Maximum number of zones to report
  216. * @cb: Callback function called for each reported zone
  217. * @data: Private data for the callback
  218. *
  219. * Description:
  220. * Get zone information starting from the zone containing @sector for at most
  221. * @nr_zones, and call @cb for each zone reported by the device.
  222. * To report all zones in a device starting from @sector, the BLK_ALL_ZONES
  223. * constant can be passed to @nr_zones.
  224. * Returns the number of zones reported by the device, or a negative errno
  225. * value in case of failure.
  226. *
  227. * Note: The caller must use memalloc_noXX_save/restore() calls to control
  228. * memory allocations done within this function.
  229. */
  230. int blkdev_report_zones(struct block_device *bdev, sector_t sector,
  231. unsigned int nr_zones, report_zones_cb cb, void *data)
  232. {
  233. struct blk_report_zones_args args = {
  234. .cb = cb,
  235. .data = data,
  236. };
  237. return blkdev_do_report_zones(bdev, sector, nr_zones, &args);
  238. }
  239. EXPORT_SYMBOL_GPL(blkdev_report_zones);
  240. static int blkdev_zone_reset_all(struct block_device *bdev)
  241. {
  242. struct bio bio;
  243. bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC);
  244. trace_blkdev_zone_mgmt(&bio, 0);
  245. return submit_bio_wait(&bio);
  246. }
  247. /**
  248. * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
  249. * @bdev: Target block device
  250. * @op: Operation to be performed on the zones
  251. * @sector: Start sector of the first zone to operate on
  252. * @nr_sectors: Number of sectors, should be at least the length of one zone and
  253. * must be zone size aligned.
  254. *
  255. * Description:
  256. * Perform the specified operation on the range of zones specified by
  257. * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
  258. * is valid, but the specified range should not contain conventional zones.
  259. * The operation to execute on each zone can be a zone reset, open, close
  260. * or finish request.
  261. */
  262. int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
  263. sector_t sector, sector_t nr_sectors)
  264. {
  265. sector_t zone_sectors = bdev_zone_sectors(bdev);
  266. sector_t capacity = bdev_nr_sectors(bdev);
  267. sector_t end_sector = sector + nr_sectors;
  268. struct bio *bio = NULL;
  269. int ret = 0;
  270. if (!bdev_is_zoned(bdev))
  271. return -EOPNOTSUPP;
  272. if (bdev_read_only(bdev))
  273. return -EPERM;
  274. if (!op_is_zone_mgmt(op))
  275. return -EOPNOTSUPP;
  276. if (end_sector <= sector || end_sector > capacity)
  277. /* Out of range */
  278. return -EINVAL;
  279. /* Check alignment (handle eventual smaller last zone) */
  280. if (!bdev_is_zone_start(bdev, sector))
  281. return -EINVAL;
  282. if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity)
  283. return -EINVAL;
  284. /*
  285. * In the case of a zone reset operation over all zones, use
  286. * REQ_OP_ZONE_RESET_ALL.
  287. */
  288. if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity)
  289. return blkdev_zone_reset_all(bdev);
  290. while (sector < end_sector) {
  291. bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
  292. bio->bi_iter.bi_sector = sector;
  293. sector += zone_sectors;
  294. /* This may take a while, so be nice to others */
  295. cond_resched();
  296. }
  297. trace_blkdev_zone_mgmt(bio, nr_sectors);
  298. ret = submit_bio_wait(bio);
  299. bio_put(bio);
  300. return ret;
  301. }
  302. EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
  303. struct zone_report_args {
  304. struct blk_zone __user *zones;
  305. };
  306. static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
  307. void *data)
  308. {
  309. struct zone_report_args *args = data;
  310. if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
  311. return -EFAULT;
  312. return 0;
  313. }
  314. /*
  315. * Mask of valid input flags for BLKREPORTZONEV2 ioctl.
  316. */
  317. #define BLK_ZONE_REPV2_INPUT_FLAGS BLK_ZONE_REP_CACHED
  318. /*
  319. * BLKREPORTZONE and BLKREPORTZONEV2 ioctl processing.
  320. * Called from blkdev_ioctl.
  321. */
  322. int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
  323. unsigned long arg)
  324. {
  325. void __user *argp = (void __user *)arg;
  326. struct zone_report_args args;
  327. struct blk_zone_report rep;
  328. int ret;
  329. if (!argp)
  330. return -EINVAL;
  331. if (!bdev_is_zoned(bdev))
  332. return -ENOTTY;
  333. if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
  334. return -EFAULT;
  335. if (!rep.nr_zones)
  336. return -EINVAL;
  337. args.zones = argp + sizeof(struct blk_zone_report);
  338. switch (cmd) {
  339. case BLKREPORTZONE:
  340. ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
  341. blkdev_copy_zone_to_user, &args);
  342. break;
  343. case BLKREPORTZONEV2:
  344. if (rep.flags & ~BLK_ZONE_REPV2_INPUT_FLAGS)
  345. return -EINVAL;
  346. ret = blkdev_report_zones_cached(bdev, rep.sector, rep.nr_zones,
  347. blkdev_copy_zone_to_user, &args);
  348. break;
  349. default:
  350. return -EINVAL;
  351. }
  352. if (ret < 0)
  353. return ret;
  354. rep.nr_zones = ret;
  355. rep.flags = BLK_ZONE_REP_CAPACITY;
  356. if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
  357. return -EFAULT;
  358. return 0;
  359. }
  360. static int blkdev_truncate_zone_range(struct block_device *bdev,
  361. blk_mode_t mode, const struct blk_zone_range *zrange)
  362. {
  363. loff_t start, end;
  364. if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
  365. zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
  366. /* Out of range */
  367. return -EINVAL;
  368. start = zrange->sector << SECTOR_SHIFT;
  369. end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
  370. return truncate_bdev_range(bdev, mode, start, end);
  371. }
  372. /*
  373. * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
  374. * Called from blkdev_ioctl.
  375. */
  376. int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
  377. unsigned int cmd, unsigned long arg)
  378. {
  379. void __user *argp = (void __user *)arg;
  380. struct blk_zone_range zrange;
  381. enum req_op op;
  382. int ret;
  383. if (!argp)
  384. return -EINVAL;
  385. if (!bdev_is_zoned(bdev))
  386. return -ENOTTY;
  387. if (!(mode & BLK_OPEN_WRITE))
  388. return -EBADF;
  389. if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
  390. return -EFAULT;
  391. switch (cmd) {
  392. case BLKRESETZONE:
  393. op = REQ_OP_ZONE_RESET;
  394. /* Invalidate the page cache, including dirty pages. */
  395. inode_lock(bdev->bd_mapping->host);
  396. filemap_invalidate_lock(bdev->bd_mapping);
  397. ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
  398. if (ret)
  399. goto fail;
  400. break;
  401. case BLKOPENZONE:
  402. op = REQ_OP_ZONE_OPEN;
  403. break;
  404. case BLKCLOSEZONE:
  405. op = REQ_OP_ZONE_CLOSE;
  406. break;
  407. case BLKFINISHZONE:
  408. op = REQ_OP_ZONE_FINISH;
  409. break;
  410. default:
  411. return -ENOTTY;
  412. }
  413. ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
  414. fail:
  415. if (cmd == BLKRESETZONE) {
  416. filemap_invalidate_unlock(bdev->bd_mapping);
  417. inode_unlock(bdev->bd_mapping->host);
  418. }
  419. return ret;
  420. }
  421. static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
  422. {
  423. return zone->start + zone->len >= get_capacity(disk);
  424. }
  425. static bool disk_zone_is_full(struct gendisk *disk,
  426. unsigned int zno, unsigned int offset_in_zone)
  427. {
  428. if (zno < disk->nr_zones - 1)
  429. return offset_in_zone >= disk->zone_capacity;
  430. return offset_in_zone >= disk->last_zone_capacity;
  431. }
  432. static bool disk_zone_wplug_is_full(struct gendisk *disk,
  433. struct blk_zone_wplug *zwplug)
  434. {
  435. return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset);
  436. }
  437. static bool disk_insert_zone_wplug(struct gendisk *disk,
  438. struct blk_zone_wplug *zwplug)
  439. {
  440. struct blk_zone_wplug *zwplg;
  441. unsigned long flags;
  442. u8 *zones_cond;
  443. unsigned int idx =
  444. hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits);
  445. /*
  446. * Add the new zone write plug to the hash table, but carefully as we
  447. * are racing with other submission context, so we may already have a
  448. * zone write plug for the same zone.
  449. */
  450. spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
  451. hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) {
  452. if (zwplg->zone_no == zwplug->zone_no) {
  453. spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
  454. return false;
  455. }
  456. }
  457. /*
  458. * Set the zone condition: if we do not yet have a zones_cond array
  459. * attached to the disk, then this is a zone write plug insert from the
  460. * first call to blk_revalidate_disk_zones(), in which case the zone is
  461. * necessarilly in the active condition.
  462. */
  463. zones_cond = rcu_dereference_check(disk->zones_cond,
  464. lockdep_is_held(&disk->zone_wplugs_lock));
  465. if (zones_cond)
  466. zwplug->cond = zones_cond[zwplug->zone_no];
  467. else
  468. zwplug->cond = BLK_ZONE_COND_ACTIVE;
  469. hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
  470. atomic_inc(&disk->nr_zone_wplugs);
  471. spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
  472. return true;
  473. }
  474. static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk,
  475. sector_t sector)
  476. {
  477. unsigned int zno = disk_zone_no(disk, sector);
  478. unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits);
  479. struct blk_zone_wplug *zwplug;
  480. rcu_read_lock();
  481. hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) {
  482. if (zwplug->zone_no == zno &&
  483. refcount_inc_not_zero(&zwplug->ref)) {
  484. rcu_read_unlock();
  485. return zwplug;
  486. }
  487. }
  488. rcu_read_unlock();
  489. return NULL;
  490. }
  491. static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
  492. sector_t sector)
  493. {
  494. if (!atomic_read(&disk->nr_zone_wplugs))
  495. return NULL;
  496. return disk_get_hashed_zone_wplug(disk, sector);
  497. }
  498. static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
  499. {
  500. struct blk_zone_wplug *zwplug =
  501. container_of(rcu_head, struct blk_zone_wplug, rcu_head);
  502. mempool_free(zwplug, zwplug->disk->zone_wplugs_pool);
  503. }
  504. static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
  505. {
  506. if (refcount_dec_and_test(&zwplug->ref)) {
  507. WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
  508. WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
  509. WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED));
  510. call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
  511. }
  512. }
  513. static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
  514. struct blk_zone_wplug *zwplug)
  515. {
  516. lockdep_assert_held(&zwplug->lock);
  517. /* If the zone write plug was already removed, we are done. */
  518. if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)
  519. return false;
  520. /* If the zone write plug is still plugged, it cannot be removed. */
  521. if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
  522. return false;
  523. /*
  524. * Completions of BIOs with blk_zone_write_plug_bio_endio() may
  525. * happen after handling a request completion with
  526. * blk_zone_write_plug_finish_request() (e.g. with split BIOs
  527. * that are chained). In such case, disk_zone_wplug_unplug_bio()
  528. * should not attempt to remove the zone write plug until all BIO
  529. * completions are seen. Check by looking at the zone write plug
  530. * reference count, which is 2 when the plug is unused (one reference
  531. * taken when the plug was allocated and another reference taken by the
  532. * caller context).
  533. */
  534. if (refcount_read(&zwplug->ref) > 2)
  535. return false;
  536. /* We can remove zone write plugs for zones that are empty or full. */
  537. return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug);
  538. }
  539. static void disk_remove_zone_wplug(struct gendisk *disk,
  540. struct blk_zone_wplug *zwplug)
  541. {
  542. unsigned long flags;
  543. /* If the zone write plug was already removed, we have nothing to do. */
  544. if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)
  545. return;
  546. /*
  547. * Mark the zone write plug as unhashed and drop the extra reference we
  548. * took when the plug was inserted in the hash table. Also update the
  549. * disk zone condition array with the current condition of the zone
  550. * write plug.
  551. */
  552. zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
  553. spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
  554. blk_zone_set_cond(rcu_dereference_check(disk->zones_cond,
  555. lockdep_is_held(&disk->zone_wplugs_lock)),
  556. zwplug->zone_no, zwplug->cond);
  557. hlist_del_init_rcu(&zwplug->node);
  558. atomic_dec(&disk->nr_zone_wplugs);
  559. spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
  560. disk_put_zone_wplug(zwplug);
  561. }
  562. static void blk_zone_wplug_bio_work(struct work_struct *work);
  563. /*
  564. * Get a reference on the write plug for the zone containing @sector.
  565. * If the plug does not exist, it is allocated and hashed.
  566. * Return a pointer to the zone write plug with the plug spinlock held.
  567. */
  568. static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
  569. sector_t sector, gfp_t gfp_mask,
  570. unsigned long *flags)
  571. {
  572. unsigned int zno = disk_zone_no(disk, sector);
  573. struct blk_zone_wplug *zwplug;
  574. again:
  575. zwplug = disk_get_zone_wplug(disk, sector);
  576. if (zwplug) {
  577. /*
  578. * Check that a BIO completion or a zone reset or finish
  579. * operation has not already removed the zone write plug from
  580. * the hash table and dropped its reference count. In such case,
  581. * we need to get a new plug so start over from the beginning.
  582. */
  583. spin_lock_irqsave(&zwplug->lock, *flags);
  584. if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) {
  585. spin_unlock_irqrestore(&zwplug->lock, *flags);
  586. disk_put_zone_wplug(zwplug);
  587. goto again;
  588. }
  589. return zwplug;
  590. }
  591. /*
  592. * Allocate and initialize a zone write plug with an extra reference
  593. * so that it is not freed when the zone write plug becomes idle without
  594. * the zone being full.
  595. */
  596. zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask);
  597. if (!zwplug)
  598. return NULL;
  599. INIT_HLIST_NODE(&zwplug->node);
  600. refcount_set(&zwplug->ref, 2);
  601. spin_lock_init(&zwplug->lock);
  602. zwplug->flags = 0;
  603. zwplug->zone_no = zno;
  604. zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
  605. bio_list_init(&zwplug->bio_list);
  606. INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
  607. zwplug->disk = disk;
  608. spin_lock_irqsave(&zwplug->lock, *flags);
  609. /*
  610. * Insert the new zone write plug in the hash table. This can fail only
  611. * if another context already inserted a plug. Retry from the beginning
  612. * in such case.
  613. */
  614. if (!disk_insert_zone_wplug(disk, zwplug)) {
  615. spin_unlock_irqrestore(&zwplug->lock, *flags);
  616. mempool_free(zwplug, disk->zone_wplugs_pool);
  617. goto again;
  618. }
  619. return zwplug;
  620. }
  621. static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug,
  622. struct bio *bio)
  623. {
  624. struct request_queue *q = zwplug->disk->queue;
  625. bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING);
  626. bio_io_error(bio);
  627. disk_put_zone_wplug(zwplug);
  628. /* Drop the reference taken by disk_zone_wplug_add_bio(). */
  629. blk_queue_exit(q);
  630. }
  631. /*
  632. * Abort (fail) all plugged BIOs of a zone write plug.
  633. */
  634. static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
  635. {
  636. struct bio *bio;
  637. lockdep_assert_held(&zwplug->lock);
  638. if (bio_list_empty(&zwplug->bio_list))
  639. return;
  640. pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n",
  641. zwplug->disk->disk_name, zwplug->zone_no);
  642. while ((bio = bio_list_pop(&zwplug->bio_list)))
  643. blk_zone_wplug_bio_io_error(zwplug, bio);
  644. zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
  645. }
  646. /*
  647. * Update a zone write plug condition based on the write pointer offset.
  648. */
  649. static void disk_zone_wplug_update_cond(struct gendisk *disk,
  650. struct blk_zone_wplug *zwplug)
  651. {
  652. lockdep_assert_held(&zwplug->lock);
  653. if (disk_zone_wplug_is_full(disk, zwplug))
  654. zwplug->cond = BLK_ZONE_COND_FULL;
  655. else if (!zwplug->wp_offset)
  656. zwplug->cond = BLK_ZONE_COND_EMPTY;
  657. else
  658. zwplug->cond = BLK_ZONE_COND_ACTIVE;
  659. }
  660. /*
  661. * Set a zone write plug write pointer offset to the specified value.
  662. * This aborts all plugged BIOs, which is fine as this function is called for
  663. * a zone reset operation, a zone finish operation or if the zone needs a wp
  664. * update from a report zone after a write error.
  665. */
  666. static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
  667. struct blk_zone_wplug *zwplug,
  668. unsigned int wp_offset)
  669. {
  670. lockdep_assert_held(&zwplug->lock);
  671. /* Update the zone write pointer and abort all plugged BIOs. */
  672. zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE;
  673. zwplug->wp_offset = wp_offset;
  674. disk_zone_wplug_update_cond(disk, zwplug);
  675. disk_zone_wplug_abort(zwplug);
  676. /*
  677. * The zone write plug now has no BIO plugged: remove it from the
  678. * hash table so that it cannot be seen. The plug will be freed
  679. * when the last reference is dropped.
  680. */
  681. if (disk_should_remove_zone_wplug(disk, zwplug))
  682. disk_remove_zone_wplug(disk, zwplug);
  683. }
  684. static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
  685. {
  686. switch (zone->cond) {
  687. case BLK_ZONE_COND_IMP_OPEN:
  688. case BLK_ZONE_COND_EXP_OPEN:
  689. case BLK_ZONE_COND_CLOSED:
  690. case BLK_ZONE_COND_ACTIVE:
  691. return zone->wp - zone->start;
  692. case BLK_ZONE_COND_EMPTY:
  693. return 0;
  694. case BLK_ZONE_COND_FULL:
  695. case BLK_ZONE_COND_NOT_WP:
  696. case BLK_ZONE_COND_OFFLINE:
  697. case BLK_ZONE_COND_READONLY:
  698. default:
  699. /*
  700. * Conventional, full, offline and read-only zones do not have
  701. * a valid write pointer.
  702. */
  703. return UINT_MAX;
  704. }
  705. }
  706. static unsigned int disk_zone_wplug_sync_wp_offset(struct gendisk *disk,
  707. struct blk_zone *zone)
  708. {
  709. struct blk_zone_wplug *zwplug;
  710. unsigned int wp_offset = blk_zone_wp_offset(zone);
  711. zwplug = disk_get_zone_wplug(disk, zone->start);
  712. if (zwplug) {
  713. unsigned long flags;
  714. spin_lock_irqsave(&zwplug->lock, flags);
  715. if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
  716. disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset);
  717. spin_unlock_irqrestore(&zwplug->lock, flags);
  718. disk_put_zone_wplug(zwplug);
  719. }
  720. return wp_offset;
  721. }
  722. /**
  723. * disk_report_zone - Report one zone
  724. * @disk: Target disk
  725. * @zone: The zone to report
  726. * @idx: The index of the zone in the overall zone report
  727. * @args: report zones callback and data
  728. *
  729. * Description:
  730. * Helper function for block device drivers to report one zone of a zone
  731. * report initiated with blkdev_report_zones(). The zone being reported is
  732. * specified by @zone and used to update, if necessary, the zone write plug
  733. * information for the zone. If @args specifies a user callback function,
  734. * this callback is executed.
  735. */
  736. int disk_report_zone(struct gendisk *disk, struct blk_zone *zone,
  737. unsigned int idx, struct blk_report_zones_args *args)
  738. {
  739. if (args && args->report_active) {
  740. /*
  741. * If we come here, then this is a report zones as a fallback
  742. * for a cached report. So collapse the implicit open, explicit
  743. * open and closed conditions into the active zone condition.
  744. */
  745. switch (zone->cond) {
  746. case BLK_ZONE_COND_IMP_OPEN:
  747. case BLK_ZONE_COND_EXP_OPEN:
  748. case BLK_ZONE_COND_CLOSED:
  749. zone->cond = BLK_ZONE_COND_ACTIVE;
  750. break;
  751. default:
  752. break;
  753. }
  754. }
  755. if (disk->zone_wplugs_hash)
  756. disk_zone_wplug_sync_wp_offset(disk, zone);
  757. if (args && args->cb)
  758. return args->cb(zone, idx, args->data);
  759. return 0;
  760. }
  761. EXPORT_SYMBOL_GPL(disk_report_zone);
  762. static int blkdev_report_zone_cb(struct blk_zone *zone, unsigned int idx,
  763. void *data)
  764. {
  765. memcpy(data, zone, sizeof(struct blk_zone));
  766. return 0;
  767. }
  768. static int blkdev_report_zone_fallback(struct block_device *bdev,
  769. sector_t sector, struct blk_zone *zone)
  770. {
  771. struct blk_report_zones_args args = {
  772. .cb = blkdev_report_zone_cb,
  773. .data = zone,
  774. .report_active = true,
  775. };
  776. int error;
  777. error = blkdev_do_report_zones(bdev, sector, 1, &args);
  778. if (error < 0)
  779. return error;
  780. if (error == 0)
  781. return -EIO;
  782. return 0;
  783. }
  784. /*
  785. * For devices that natively support zone append operations, we do not use zone
  786. * write plugging for zone append writes, which makes the zone condition
  787. * tracking invalid once zone append was used. In that case fall back to a
  788. * regular report zones to get correct information.
  789. */
  790. static inline bool blkdev_has_cached_report_zones(struct block_device *bdev)
  791. {
  792. return disk_need_zone_resources(bdev->bd_disk) &&
  793. (bdev_emulates_zone_append(bdev) ||
  794. !test_bit(GD_ZONE_APPEND_USED, &bdev->bd_disk->state));
  795. }
  796. /**
  797. * blkdev_get_zone_info - Get a single zone information from cached data
  798. * @bdev: Target block device
  799. * @sector: Sector contained by the target zone
  800. * @zone: zone structure to return the zone information
  801. *
  802. * Description:
  803. * Get the zone information for the zone containing @sector using the zone
  804. * write plug of the target zone, if one exist, or the disk zone condition
  805. * array otherwise. The zone condition may be reported as being
  806. * the BLK_ZONE_COND_ACTIVE condition for a zone that is in the implicit
  807. * open, explicit open or closed condition.
  808. *
  809. * Returns 0 on success and a negative error code on failure.
  810. */
  811. int blkdev_get_zone_info(struct block_device *bdev, sector_t sector,
  812. struct blk_zone *zone)
  813. {
  814. struct gendisk *disk = bdev->bd_disk;
  815. sector_t zone_sectors = bdev_zone_sectors(bdev);
  816. struct blk_zone_wplug *zwplug;
  817. unsigned long flags;
  818. u8 *zones_cond;
  819. if (!bdev_is_zoned(bdev))
  820. return -EOPNOTSUPP;
  821. if (sector >= get_capacity(disk))
  822. return -EINVAL;
  823. memset(zone, 0, sizeof(*zone));
  824. sector = bdev_zone_start(bdev, sector);
  825. if (!blkdev_has_cached_report_zones(bdev))
  826. return blkdev_report_zone_fallback(bdev, sector, zone);
  827. rcu_read_lock();
  828. zones_cond = rcu_dereference(disk->zones_cond);
  829. if (!disk->zone_wplugs_hash || !zones_cond) {
  830. rcu_read_unlock();
  831. return blkdev_report_zone_fallback(bdev, sector, zone);
  832. }
  833. zone->cond = zones_cond[disk_zone_no(disk, sector)];
  834. rcu_read_unlock();
  835. zone->start = sector;
  836. zone->len = zone_sectors;
  837. /*
  838. * If this is a conventional zone, we do not have a zone write plug and
  839. * can report the zone immediately.
  840. */
  841. if (zone->cond == BLK_ZONE_COND_NOT_WP) {
  842. zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
  843. zone->capacity = zone_sectors;
  844. zone->wp = ULLONG_MAX;
  845. return 0;
  846. }
  847. /*
  848. * This is a sequential write required zone. If the zone is read-only or
  849. * offline, only set the zone write pointer to an invalid value and
  850. * report the zone.
  851. */
  852. zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
  853. if (disk_zone_is_last(disk, zone))
  854. zone->capacity = disk->last_zone_capacity;
  855. else
  856. zone->capacity = disk->zone_capacity;
  857. if (zone->cond == BLK_ZONE_COND_READONLY ||
  858. zone->cond == BLK_ZONE_COND_OFFLINE) {
  859. zone->wp = ULLONG_MAX;
  860. return 0;
  861. }
  862. /*
  863. * If the zone does not have a zone write plug, it is either full or
  864. * empty, as we otherwise would have a zone write plug for it. In this
  865. * case, set the write pointer accordingly and report the zone.
  866. * Otherwise, if we have a zone write plug, use it.
  867. */
  868. zwplug = disk_get_zone_wplug(disk, sector);
  869. if (!zwplug) {
  870. if (zone->cond == BLK_ZONE_COND_FULL)
  871. zone->wp = ULLONG_MAX;
  872. else
  873. zone->wp = sector;
  874. return 0;
  875. }
  876. spin_lock_irqsave(&zwplug->lock, flags);
  877. if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) {
  878. spin_unlock_irqrestore(&zwplug->lock, flags);
  879. disk_put_zone_wplug(zwplug);
  880. return blkdev_report_zone_fallback(bdev, sector, zone);
  881. }
  882. zone->cond = zwplug->cond;
  883. zone->wp = sector + zwplug->wp_offset;
  884. spin_unlock_irqrestore(&zwplug->lock, flags);
  885. disk_put_zone_wplug(zwplug);
  886. return 0;
  887. }
  888. EXPORT_SYMBOL_GPL(blkdev_get_zone_info);
  889. /**
  890. * blkdev_report_zones_cached - Get cached zones information
  891. * @bdev: Target block device
  892. * @sector: Sector from which to report zones
  893. * @nr_zones: Maximum number of zones to report
  894. * @cb: Callback function called for each reported zone
  895. * @data: Private data for the callback function
  896. *
  897. * Description:
  898. * Similar to blkdev_report_zones() but instead of calling into the low level
  899. * device driver to get the zone report from the device, use
  900. * blkdev_get_zone_info() to generate the report from the disk zone write
  901. * plugs and zones condition array. Since calling this function without a
  902. * callback does not make sense, @cb must be specified.
  903. */
  904. int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector,
  905. unsigned int nr_zones, report_zones_cb cb, void *data)
  906. {
  907. struct gendisk *disk = bdev->bd_disk;
  908. sector_t capacity = get_capacity(disk);
  909. sector_t zone_sectors = bdev_zone_sectors(bdev);
  910. unsigned int idx = 0;
  911. struct blk_zone zone;
  912. int ret;
  913. if (!cb || !bdev_is_zoned(bdev) ||
  914. WARN_ON_ONCE(!disk->fops->report_zones))
  915. return -EOPNOTSUPP;
  916. if (!nr_zones || sector >= capacity)
  917. return 0;
  918. if (!blkdev_has_cached_report_zones(bdev)) {
  919. struct blk_report_zones_args args = {
  920. .cb = cb,
  921. .data = data,
  922. .report_active = true,
  923. };
  924. return blkdev_do_report_zones(bdev, sector, nr_zones, &args);
  925. }
  926. for (sector = bdev_zone_start(bdev, sector);
  927. sector < capacity && idx < nr_zones;
  928. sector += zone_sectors, idx++) {
  929. ret = blkdev_get_zone_info(bdev, sector, &zone);
  930. if (ret)
  931. return ret;
  932. ret = cb(&zone, idx, data);
  933. if (ret)
  934. return ret;
  935. }
  936. return idx;
  937. }
  938. EXPORT_SYMBOL_GPL(blkdev_report_zones_cached);
  939. static void blk_zone_reset_bio_endio(struct bio *bio)
  940. {
  941. struct gendisk *disk = bio->bi_bdev->bd_disk;
  942. sector_t sector = bio->bi_iter.bi_sector;
  943. struct blk_zone_wplug *zwplug;
  944. /*
  945. * If we have a zone write plug, set its write pointer offset to 0.
  946. * This will abort all BIOs plugged for the target zone. It is fine as
  947. * resetting zones while writes are still in-flight will result in the
  948. * writes failing anyway.
  949. */
  950. zwplug = disk_get_zone_wplug(disk, sector);
  951. if (zwplug) {
  952. unsigned long flags;
  953. spin_lock_irqsave(&zwplug->lock, flags);
  954. disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
  955. spin_unlock_irqrestore(&zwplug->lock, flags);
  956. disk_put_zone_wplug(zwplug);
  957. } else {
  958. disk_zone_set_cond(disk, sector, BLK_ZONE_COND_EMPTY);
  959. }
  960. }
  961. static void blk_zone_reset_all_bio_endio(struct bio *bio)
  962. {
  963. struct gendisk *disk = bio->bi_bdev->bd_disk;
  964. sector_t capacity = get_capacity(disk);
  965. struct blk_zone_wplug *zwplug;
  966. unsigned long flags;
  967. sector_t sector;
  968. unsigned int i;
  969. if (atomic_read(&disk->nr_zone_wplugs)) {
  970. /* Update the condition of all zone write plugs. */
  971. rcu_read_lock();
  972. for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) {
  973. hlist_for_each_entry_rcu(zwplug,
  974. &disk->zone_wplugs_hash[i],
  975. node) {
  976. spin_lock_irqsave(&zwplug->lock, flags);
  977. disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
  978. spin_unlock_irqrestore(&zwplug->lock, flags);
  979. }
  980. }
  981. rcu_read_unlock();
  982. }
  983. /* Update the cached zone conditions. */
  984. for (sector = 0; sector < capacity;
  985. sector += bdev_zone_sectors(bio->bi_bdev))
  986. disk_zone_set_cond(disk, sector, BLK_ZONE_COND_EMPTY);
  987. clear_bit(GD_ZONE_APPEND_USED, &disk->state);
  988. }
  989. static void blk_zone_finish_bio_endio(struct bio *bio)
  990. {
  991. struct block_device *bdev = bio->bi_bdev;
  992. struct gendisk *disk = bdev->bd_disk;
  993. sector_t sector = bio->bi_iter.bi_sector;
  994. struct blk_zone_wplug *zwplug;
  995. /*
  996. * If we have a zone write plug, set its write pointer offset to the
  997. * zone size. This will abort all BIOs plugged for the target zone. It
  998. * is fine as resetting zones while writes are still in-flight will
  999. * result in the writes failing anyway.
  1000. */
  1001. zwplug = disk_get_zone_wplug(disk, sector);
  1002. if (zwplug) {
  1003. unsigned long flags;
  1004. spin_lock_irqsave(&zwplug->lock, flags);
  1005. disk_zone_wplug_set_wp_offset(disk, zwplug,
  1006. bdev_zone_sectors(bdev));
  1007. spin_unlock_irqrestore(&zwplug->lock, flags);
  1008. disk_put_zone_wplug(zwplug);
  1009. } else {
  1010. disk_zone_set_cond(disk, sector, BLK_ZONE_COND_FULL);
  1011. }
  1012. }
  1013. void blk_zone_mgmt_bio_endio(struct bio *bio)
  1014. {
  1015. /* If the BIO failed, we have nothing to do. */
  1016. if (bio->bi_status != BLK_STS_OK)
  1017. return;
  1018. switch (bio_op(bio)) {
  1019. case REQ_OP_ZONE_RESET:
  1020. blk_zone_reset_bio_endio(bio);
  1021. return;
  1022. case REQ_OP_ZONE_RESET_ALL:
  1023. blk_zone_reset_all_bio_endio(bio);
  1024. return;
  1025. case REQ_OP_ZONE_FINISH:
  1026. blk_zone_finish_bio_endio(bio);
  1027. return;
  1028. default:
  1029. return;
  1030. }
  1031. }
  1032. static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
  1033. struct blk_zone_wplug *zwplug)
  1034. {
  1035. lockdep_assert_held(&zwplug->lock);
  1036. /*
  1037. * Take a reference on the zone write plug and schedule the submission
  1038. * of the next plugged BIO. blk_zone_wplug_bio_work() will release the
  1039. * reference we take here.
  1040. */
  1041. WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
  1042. refcount_inc(&zwplug->ref);
  1043. queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
  1044. }
  1045. static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
  1046. struct blk_zone_wplug *zwplug,
  1047. struct bio *bio, unsigned int nr_segs)
  1048. {
  1049. /*
  1050. * Grab an extra reference on the BIO request queue usage counter.
  1051. * This reference will be reused to submit a request for the BIO for
  1052. * blk-mq devices and dropped when the BIO is failed and after
  1053. * it is issued in the case of BIO-based devices.
  1054. */
  1055. percpu_ref_get(&bio->bi_bdev->bd_disk->queue->q_usage_counter);
  1056. /*
  1057. * The BIO is being plugged and thus will have to wait for the on-going
  1058. * write and for all other writes already plugged. So polling makes
  1059. * no sense.
  1060. */
  1061. bio_clear_polled(bio);
  1062. /*
  1063. * Reuse the poll cookie field to store the number of segments when
  1064. * split to the hardware limits.
  1065. */
  1066. bio->__bi_nr_segments = nr_segs;
  1067. /*
  1068. * We always receive BIOs after they are split and ready to be issued.
  1069. * The block layer passes the parts of a split BIO in order, and the
  1070. * user must also issue write sequentially. So simply add the new BIO
  1071. * at the tail of the list to preserve the sequential write order.
  1072. */
  1073. bio_list_add(&zwplug->bio_list, bio);
  1074. trace_disk_zone_wplug_add_bio(zwplug->disk->queue, zwplug->zone_no,
  1075. bio->bi_iter.bi_sector, bio_sectors(bio));
  1076. }
  1077. /*
  1078. * Called from bio_attempt_back_merge() when a BIO was merged with a request.
  1079. */
  1080. void blk_zone_write_plug_bio_merged(struct bio *bio)
  1081. {
  1082. struct gendisk *disk = bio->bi_bdev->bd_disk;
  1083. struct blk_zone_wplug *zwplug;
  1084. unsigned long flags;
  1085. /*
  1086. * If the BIO was already plugged, then we were called through
  1087. * blk_zone_write_plug_init_request() -> blk_attempt_bio_merge().
  1088. * For this case, we already hold a reference on the zone write plug for
  1089. * the BIO and blk_zone_write_plug_init_request() will handle the
  1090. * zone write pointer offset update.
  1091. */
  1092. if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING))
  1093. return;
  1094. bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
  1095. /*
  1096. * Get a reference on the zone write plug of the target zone and advance
  1097. * the zone write pointer offset. Given that this is a merge, we already
  1098. * have at least one request and one BIO referencing the zone write
  1099. * plug. So this should not fail.
  1100. */
  1101. zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
  1102. if (WARN_ON_ONCE(!zwplug))
  1103. return;
  1104. spin_lock_irqsave(&zwplug->lock, flags);
  1105. zwplug->wp_offset += bio_sectors(bio);
  1106. disk_zone_wplug_update_cond(disk, zwplug);
  1107. spin_unlock_irqrestore(&zwplug->lock, flags);
  1108. }
  1109. /*
  1110. * Attempt to merge plugged BIOs with a newly prepared request for a BIO that
  1111. * already went through zone write plugging (either a new BIO or one that was
  1112. * unplugged).
  1113. */
  1114. void blk_zone_write_plug_init_request(struct request *req)
  1115. {
  1116. sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req);
  1117. struct request_queue *q = req->q;
  1118. struct gendisk *disk = q->disk;
  1119. struct blk_zone_wplug *zwplug =
  1120. disk_get_zone_wplug(disk, blk_rq_pos(req));
  1121. unsigned long flags;
  1122. struct bio *bio;
  1123. if (WARN_ON_ONCE(!zwplug))
  1124. return;
  1125. /*
  1126. * Indicate that completion of this request needs to be handled with
  1127. * blk_zone_write_plug_finish_request(), which will drop the reference
  1128. * on the zone write plug we took above on entry to this function.
  1129. */
  1130. req->rq_flags |= RQF_ZONE_WRITE_PLUGGING;
  1131. if (blk_queue_nomerges(q))
  1132. return;
  1133. /*
  1134. * Walk through the list of plugged BIOs to check if they can be merged
  1135. * into the back of the request.
  1136. */
  1137. spin_lock_irqsave(&zwplug->lock, flags);
  1138. while (!disk_zone_wplug_is_full(disk, zwplug)) {
  1139. bio = bio_list_peek(&zwplug->bio_list);
  1140. if (!bio)
  1141. break;
  1142. if (bio->bi_iter.bi_sector != req_back_sector ||
  1143. !blk_rq_merge_ok(req, bio))
  1144. break;
  1145. WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE_ZEROES &&
  1146. !bio->__bi_nr_segments);
  1147. bio_list_pop(&zwplug->bio_list);
  1148. if (bio_attempt_back_merge(req, bio, bio->__bi_nr_segments) !=
  1149. BIO_MERGE_OK) {
  1150. bio_list_add_head(&zwplug->bio_list, bio);
  1151. break;
  1152. }
  1153. /* Drop the reference taken by disk_zone_wplug_add_bio(). */
  1154. blk_queue_exit(q);
  1155. zwplug->wp_offset += bio_sectors(bio);
  1156. disk_zone_wplug_update_cond(disk, zwplug);
  1157. req_back_sector += bio_sectors(bio);
  1158. }
  1159. spin_unlock_irqrestore(&zwplug->lock, flags);
  1160. }
  1161. /*
  1162. * Check and prepare a BIO for submission by incrementing the write pointer
  1163. * offset of its zone write plug and changing zone append operations into
  1164. * regular write when zone append emulation is needed.
  1165. */
  1166. static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
  1167. struct bio *bio)
  1168. {
  1169. struct gendisk *disk = bio->bi_bdev->bd_disk;
  1170. lockdep_assert_held(&zwplug->lock);
  1171. /*
  1172. * If we lost track of the zone write pointer due to a write error,
  1173. * the user must either execute a report zones, reset the zone or finish
  1174. * the to recover a reliable write pointer position. Fail BIOs if the
  1175. * user did not do that as we cannot handle emulated zone append
  1176. * otherwise.
  1177. */
  1178. if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
  1179. return false;
  1180. /*
  1181. * Check that the user is not attempting to write to a full zone.
  1182. * We know such BIO will fail, and that would potentially overflow our
  1183. * write pointer offset beyond the end of the zone.
  1184. */
  1185. if (disk_zone_wplug_is_full(disk, zwplug))
  1186. return false;
  1187. if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
  1188. /*
  1189. * Use a regular write starting at the current write pointer.
  1190. * Similarly to native zone append operations, do not allow
  1191. * merging.
  1192. */
  1193. bio->bi_opf &= ~REQ_OP_MASK;
  1194. bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE;
  1195. bio->bi_iter.bi_sector += zwplug->wp_offset;
  1196. /*
  1197. * Remember that this BIO is in fact a zone append operation
  1198. * so that we can restore its operation code on completion.
  1199. */
  1200. bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND);
  1201. } else {
  1202. /*
  1203. * Check for non-sequential writes early as we know that BIOs
  1204. * with a start sector not unaligned to the zone write pointer
  1205. * will fail.
  1206. */
  1207. if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
  1208. return false;
  1209. }
  1210. /* Advance the zone write pointer offset. */
  1211. zwplug->wp_offset += bio_sectors(bio);
  1212. disk_zone_wplug_update_cond(disk, zwplug);
  1213. return true;
  1214. }
  1215. static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
  1216. {
  1217. struct gendisk *disk = bio->bi_bdev->bd_disk;
  1218. sector_t sector = bio->bi_iter.bi_sector;
  1219. struct blk_zone_wplug *zwplug;
  1220. gfp_t gfp_mask = GFP_NOIO;
  1221. unsigned long flags;
  1222. /*
  1223. * BIOs must be fully contained within a zone so that we use the correct
  1224. * zone write plug for the entire BIO. For blk-mq devices, the block
  1225. * layer should already have done any splitting required to ensure this
  1226. * and this BIO should thus not be straddling zone boundaries. For
  1227. * BIO-based devices, it is the responsibility of the driver to split
  1228. * the bio before submitting it.
  1229. */
  1230. if (WARN_ON_ONCE(bio_straddles_zones(bio))) {
  1231. bio_io_error(bio);
  1232. return true;
  1233. }
  1234. /* Conventional zones do not need write plugging. */
  1235. if (!bdev_zone_is_seq(bio->bi_bdev, sector)) {
  1236. /* Zone append to conventional zones is not allowed. */
  1237. if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
  1238. bio_io_error(bio);
  1239. return true;
  1240. }
  1241. return false;
  1242. }
  1243. if (bio->bi_opf & REQ_NOWAIT)
  1244. gfp_mask = GFP_NOWAIT;
  1245. zwplug = disk_get_and_lock_zone_wplug(disk, sector, gfp_mask, &flags);
  1246. if (!zwplug) {
  1247. if (bio->bi_opf & REQ_NOWAIT)
  1248. bio_wouldblock_error(bio);
  1249. else
  1250. bio_io_error(bio);
  1251. return true;
  1252. }
  1253. /* Indicate that this BIO is being handled using zone write plugging. */
  1254. bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
  1255. /*
  1256. * Add REQ_NOWAIT BIOs to the plug list to ensure that we will not see a
  1257. * BLK_STS_AGAIN failure if we let the caller submit the BIO.
  1258. */
  1259. if (bio->bi_opf & REQ_NOWAIT) {
  1260. bio->bi_opf &= ~REQ_NOWAIT;
  1261. goto queue_bio;
  1262. }
  1263. /* If the zone is already plugged, add the BIO to the BIO plug list. */
  1264. if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
  1265. goto queue_bio;
  1266. if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
  1267. spin_unlock_irqrestore(&zwplug->lock, flags);
  1268. bio_io_error(bio);
  1269. return true;
  1270. }
  1271. /* Otherwise, plug and let the caller submit the BIO. */
  1272. zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
  1273. spin_unlock_irqrestore(&zwplug->lock, flags);
  1274. return false;
  1275. queue_bio:
  1276. disk_zone_wplug_add_bio(disk, zwplug, bio, nr_segs);
  1277. if (!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)) {
  1278. zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
  1279. disk_zone_wplug_schedule_bio_work(disk, zwplug);
  1280. }
  1281. spin_unlock_irqrestore(&zwplug->lock, flags);
  1282. return true;
  1283. }
  1284. static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
  1285. {
  1286. struct gendisk *disk = bio->bi_bdev->bd_disk;
  1287. struct blk_zone_wplug *zwplug;
  1288. unsigned long flags;
  1289. if (!test_bit(GD_ZONE_APPEND_USED, &disk->state))
  1290. set_bit(GD_ZONE_APPEND_USED, &disk->state);
  1291. /*
  1292. * We have native support for zone append operations, so we are not
  1293. * going to handle @bio through plugging. However, we may already have a
  1294. * zone write plug for the target zone if that zone was previously
  1295. * partially written using regular writes. In such case, we risk leaving
  1296. * the plug in the disk hash table if the zone is fully written using
  1297. * zone append operations. Avoid this by removing the zone write plug.
  1298. */
  1299. zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
  1300. if (likely(!zwplug))
  1301. return;
  1302. spin_lock_irqsave(&zwplug->lock, flags);
  1303. /*
  1304. * We are about to remove the zone write plug. But if the user
  1305. * (mistakenly) has issued regular writes together with native zone
  1306. * append, we must aborts the writes as otherwise the plugged BIOs would
  1307. * not be executed by the plug BIO work as disk_get_zone_wplug() will
  1308. * return NULL after the plug is removed. Aborting the plugged write
  1309. * BIOs is consistent with the fact that these writes will most likely
  1310. * fail anyway as there is no ordering guarantees between zone append
  1311. * operations and regular write operations.
  1312. */
  1313. if (!bio_list_empty(&zwplug->bio_list)) {
  1314. pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n",
  1315. disk->disk_name, zwplug->zone_no);
  1316. disk_zone_wplug_abort(zwplug);
  1317. }
  1318. disk_remove_zone_wplug(disk, zwplug);
  1319. spin_unlock_irqrestore(&zwplug->lock, flags);
  1320. disk_put_zone_wplug(zwplug);
  1321. }
  1322. static bool blk_zone_wplug_handle_zone_mgmt(struct bio *bio)
  1323. {
  1324. if (bio_op(bio) != REQ_OP_ZONE_RESET_ALL &&
  1325. !bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) {
  1326. /*
  1327. * Zone reset and zone finish operations do not apply to
  1328. * conventional zones.
  1329. */
  1330. bio_io_error(bio);
  1331. return true;
  1332. }
  1333. /*
  1334. * No-wait zone management BIOs do not make much sense as the callers
  1335. * issue these as blocking operations in most cases. To avoid issues
  1336. * with the BIO execution potentially failing with BLK_STS_AGAIN, warn
  1337. * about REQ_NOWAIT being set and ignore that flag.
  1338. */
  1339. if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT))
  1340. bio->bi_opf &= ~REQ_NOWAIT;
  1341. return false;
  1342. }
  1343. /**
  1344. * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
  1345. * @bio: The BIO being submitted
  1346. * @nr_segs: The number of physical segments of @bio
  1347. *
  1348. * Handle write, write zeroes and zone append operations requiring emulation
  1349. * using zone write plugging.
  1350. *
  1351. * Return true whenever @bio execution needs to be delayed through the zone
  1352. * write plug. Otherwise, return false to let the submission path process
  1353. * @bio normally.
  1354. */
  1355. bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
  1356. {
  1357. struct block_device *bdev = bio->bi_bdev;
  1358. if (WARN_ON_ONCE(!bdev->bd_disk->zone_wplugs_hash))
  1359. return false;
  1360. /*
  1361. * Regular writes and write zeroes need to be handled through the target
  1362. * zone write plug. This includes writes with REQ_FUA | REQ_PREFLUSH
  1363. * which may need to go through the flush machinery depending on the
  1364. * target device capabilities. Plugging such writes is fine as the flush
  1365. * machinery operates at the request level, below the plug, and
  1366. * completion of the flush sequence will go through the regular BIO
  1367. * completion, which will handle zone write plugging.
  1368. * Zone append operations for devices that requested emulation must
  1369. * also be plugged so that these BIOs can be changed into regular
  1370. * write BIOs.
  1371. * Zone reset, reset all and finish commands need special treatment
  1372. * to correctly track the write pointer offset of zones. These commands
  1373. * are not plugged as we do not need serialization with write
  1374. * operations. It is the responsibility of the user to not issue reset
  1375. * and finish commands when write operations are in flight.
  1376. */
  1377. switch (bio_op(bio)) {
  1378. case REQ_OP_ZONE_APPEND:
  1379. if (!bdev_emulates_zone_append(bdev)) {
  1380. blk_zone_wplug_handle_native_zone_append(bio);
  1381. return false;
  1382. }
  1383. fallthrough;
  1384. case REQ_OP_WRITE:
  1385. case REQ_OP_WRITE_ZEROES:
  1386. return blk_zone_wplug_handle_write(bio, nr_segs);
  1387. case REQ_OP_ZONE_RESET:
  1388. case REQ_OP_ZONE_FINISH:
  1389. case REQ_OP_ZONE_RESET_ALL:
  1390. return blk_zone_wplug_handle_zone_mgmt(bio);
  1391. default:
  1392. return false;
  1393. }
  1394. return false;
  1395. }
  1396. EXPORT_SYMBOL_GPL(blk_zone_plug_bio);
  1397. static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
  1398. struct blk_zone_wplug *zwplug)
  1399. {
  1400. unsigned long flags;
  1401. spin_lock_irqsave(&zwplug->lock, flags);
  1402. /* Schedule submission of the next plugged BIO if we have one. */
  1403. if (!bio_list_empty(&zwplug->bio_list)) {
  1404. disk_zone_wplug_schedule_bio_work(disk, zwplug);
  1405. spin_unlock_irqrestore(&zwplug->lock, flags);
  1406. return;
  1407. }
  1408. zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
  1409. /*
  1410. * If the zone is full (it was fully written or finished, or empty
  1411. * (it was reset), remove its zone write plug from the hash table.
  1412. */
  1413. if (disk_should_remove_zone_wplug(disk, zwplug))
  1414. disk_remove_zone_wplug(disk, zwplug);
  1415. spin_unlock_irqrestore(&zwplug->lock, flags);
  1416. }
  1417. void blk_zone_append_update_request_bio(struct request *rq, struct bio *bio)
  1418. {
  1419. /*
  1420. * For zone append requests, the request sector indicates the location
  1421. * at which the BIO data was written. Return this value to the BIO
  1422. * issuer through the BIO iter sector.
  1423. * For plugged zone writes, which include emulated zone append, we need
  1424. * the original BIO sector so that blk_zone_write_plug_bio_endio() can
  1425. * lookup the zone write plug.
  1426. */
  1427. bio->bi_iter.bi_sector = rq->__sector;
  1428. trace_blk_zone_append_update_request_bio(rq);
  1429. }
  1430. void blk_zone_write_plug_bio_endio(struct bio *bio)
  1431. {
  1432. struct gendisk *disk = bio->bi_bdev->bd_disk;
  1433. struct blk_zone_wplug *zwplug =
  1434. disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
  1435. unsigned long flags;
  1436. if (WARN_ON_ONCE(!zwplug))
  1437. return;
  1438. /* Make sure we do not see this BIO again by clearing the plug flag. */
  1439. bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING);
  1440. /*
  1441. * If this is a regular write emulating a zone append operation,
  1442. * restore the original operation code.
  1443. */
  1444. if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) {
  1445. bio->bi_opf &= ~REQ_OP_MASK;
  1446. bio->bi_opf |= REQ_OP_ZONE_APPEND;
  1447. bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND);
  1448. }
  1449. /*
  1450. * If the BIO failed, abort all plugged BIOs and mark the plug as
  1451. * needing a write pointer update.
  1452. */
  1453. if (bio->bi_status != BLK_STS_OK) {
  1454. spin_lock_irqsave(&zwplug->lock, flags);
  1455. disk_zone_wplug_abort(zwplug);
  1456. zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE;
  1457. spin_unlock_irqrestore(&zwplug->lock, flags);
  1458. }
  1459. /* Drop the reference we took when the BIO was issued. */
  1460. disk_put_zone_wplug(zwplug);
  1461. /*
  1462. * For BIO-based devices, blk_zone_write_plug_finish_request()
  1463. * is not called. So we need to schedule execution of the next
  1464. * plugged BIO here.
  1465. */
  1466. if (bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO))
  1467. disk_zone_wplug_unplug_bio(disk, zwplug);
  1468. /* Drop the reference we took when entering this function. */
  1469. disk_put_zone_wplug(zwplug);
  1470. }
  1471. void blk_zone_write_plug_finish_request(struct request *req)
  1472. {
  1473. struct gendisk *disk = req->q->disk;
  1474. struct blk_zone_wplug *zwplug;
  1475. zwplug = disk_get_zone_wplug(disk, req->__sector);
  1476. if (WARN_ON_ONCE(!zwplug))
  1477. return;
  1478. req->rq_flags &= ~RQF_ZONE_WRITE_PLUGGING;
  1479. /*
  1480. * Drop the reference we took when the request was initialized in
  1481. * blk_zone_write_plug_init_request().
  1482. */
  1483. disk_put_zone_wplug(zwplug);
  1484. disk_zone_wplug_unplug_bio(disk, zwplug);
  1485. /* Drop the reference we took when entering this function. */
  1486. disk_put_zone_wplug(zwplug);
  1487. }
  1488. static void blk_zone_wplug_bio_work(struct work_struct *work)
  1489. {
  1490. struct blk_zone_wplug *zwplug =
  1491. container_of(work, struct blk_zone_wplug, bio_work);
  1492. struct block_device *bdev;
  1493. unsigned long flags;
  1494. struct bio *bio;
  1495. bool prepared;
  1496. /*
  1497. * Submit the next plugged BIO. If we do not have any, clear
  1498. * the plugged flag.
  1499. */
  1500. again:
  1501. spin_lock_irqsave(&zwplug->lock, flags);
  1502. bio = bio_list_pop(&zwplug->bio_list);
  1503. if (!bio) {
  1504. zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
  1505. spin_unlock_irqrestore(&zwplug->lock, flags);
  1506. goto put_zwplug;
  1507. }
  1508. trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no,
  1509. bio->bi_iter.bi_sector, bio_sectors(bio));
  1510. prepared = blk_zone_wplug_prepare_bio(zwplug, bio);
  1511. spin_unlock_irqrestore(&zwplug->lock, flags);
  1512. if (!prepared) {
  1513. blk_zone_wplug_bio_io_error(zwplug, bio);
  1514. goto again;
  1515. }
  1516. bdev = bio->bi_bdev;
  1517. /*
  1518. * blk-mq devices will reuse the extra reference on the request queue
  1519. * usage counter we took when the BIO was plugged, but the submission
  1520. * path for BIO-based devices will not do that. So drop this extra
  1521. * reference here.
  1522. */
  1523. if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) {
  1524. bdev->bd_disk->fops->submit_bio(bio);
  1525. blk_queue_exit(bdev->bd_disk->queue);
  1526. } else {
  1527. blk_mq_submit_bio(bio);
  1528. }
  1529. put_zwplug:
  1530. /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */
  1531. disk_put_zone_wplug(zwplug);
  1532. }
  1533. void disk_init_zone_resources(struct gendisk *disk)
  1534. {
  1535. spin_lock_init(&disk->zone_wplugs_lock);
  1536. }
  1537. /*
  1538. * For the size of a disk zone write plug hash table, use the size of the
  1539. * zone write plug mempool, which is the maximum of the disk open zones and
  1540. * active zones limits. But do not exceed 4KB (512 hlist head entries), that is,
  1541. * 9 bits. For a disk that has no limits, mempool size defaults to 128.
  1542. */
  1543. #define BLK_ZONE_WPLUG_MAX_HASH_BITS 9
  1544. #define BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE 128
  1545. static int disk_alloc_zone_resources(struct gendisk *disk,
  1546. unsigned int pool_size)
  1547. {
  1548. unsigned int i;
  1549. atomic_set(&disk->nr_zone_wplugs, 0);
  1550. disk->zone_wplugs_hash_bits =
  1551. min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS);
  1552. disk->zone_wplugs_hash =
  1553. kzalloc_objs(struct hlist_head,
  1554. disk_zone_wplugs_hash_size(disk));
  1555. if (!disk->zone_wplugs_hash)
  1556. return -ENOMEM;
  1557. for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++)
  1558. INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]);
  1559. disk->zone_wplugs_pool = mempool_create_kmalloc_pool(pool_size,
  1560. sizeof(struct blk_zone_wplug));
  1561. if (!disk->zone_wplugs_pool)
  1562. goto free_hash;
  1563. disk->zone_wplugs_wq =
  1564. alloc_workqueue("%s_zwplugs", WQ_MEM_RECLAIM | WQ_HIGHPRI,
  1565. pool_size, disk->disk_name);
  1566. if (!disk->zone_wplugs_wq)
  1567. goto destroy_pool;
  1568. return 0;
  1569. destroy_pool:
  1570. mempool_destroy(disk->zone_wplugs_pool);
  1571. disk->zone_wplugs_pool = NULL;
  1572. free_hash:
  1573. kfree(disk->zone_wplugs_hash);
  1574. disk->zone_wplugs_hash = NULL;
  1575. disk->zone_wplugs_hash_bits = 0;
  1576. return -ENOMEM;
  1577. }
  1578. static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
  1579. {
  1580. struct blk_zone_wplug *zwplug;
  1581. unsigned int i;
  1582. if (!disk->zone_wplugs_hash)
  1583. return;
  1584. /* Free all the zone write plugs we have. */
  1585. for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) {
  1586. while (!hlist_empty(&disk->zone_wplugs_hash[i])) {
  1587. zwplug = hlist_entry(disk->zone_wplugs_hash[i].first,
  1588. struct blk_zone_wplug, node);
  1589. refcount_inc(&zwplug->ref);
  1590. disk_remove_zone_wplug(disk, zwplug);
  1591. disk_put_zone_wplug(zwplug);
  1592. }
  1593. }
  1594. WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs));
  1595. kfree(disk->zone_wplugs_hash);
  1596. disk->zone_wplugs_hash = NULL;
  1597. disk->zone_wplugs_hash_bits = 0;
  1598. /*
  1599. * Wait for the zone write plugs to be RCU-freed before destroying the
  1600. * mempool.
  1601. */
  1602. rcu_barrier();
  1603. mempool_destroy(disk->zone_wplugs_pool);
  1604. disk->zone_wplugs_pool = NULL;
  1605. }
  1606. static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
  1607. {
  1608. unsigned long flags;
  1609. spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
  1610. zones_cond = rcu_replace_pointer(disk->zones_cond, zones_cond,
  1611. lockdep_is_held(&disk->zone_wplugs_lock));
  1612. spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
  1613. kfree_rcu_mightsleep(zones_cond);
  1614. }
  1615. void disk_free_zone_resources(struct gendisk *disk)
  1616. {
  1617. if (disk->zone_wplugs_wq) {
  1618. destroy_workqueue(disk->zone_wplugs_wq);
  1619. disk->zone_wplugs_wq = NULL;
  1620. }
  1621. disk_destroy_zone_wplugs_hash_table(disk);
  1622. disk_set_zones_cond_array(disk, NULL);
  1623. disk->zone_capacity = 0;
  1624. disk->last_zone_capacity = 0;
  1625. disk->nr_zones = 0;
  1626. }
  1627. struct blk_revalidate_zone_args {
  1628. struct gendisk *disk;
  1629. u8 *zones_cond;
  1630. unsigned int nr_zones;
  1631. unsigned int nr_conv_zones;
  1632. unsigned int zone_capacity;
  1633. unsigned int last_zone_capacity;
  1634. sector_t sector;
  1635. };
  1636. static int disk_revalidate_zone_resources(struct gendisk *disk,
  1637. struct blk_revalidate_zone_args *args)
  1638. {
  1639. struct queue_limits *lim = &disk->queue->limits;
  1640. unsigned int pool_size;
  1641. args->disk = disk;
  1642. args->nr_zones =
  1643. DIV_ROUND_UP_ULL(get_capacity(disk), lim->chunk_sectors);
  1644. /* Cached zone conditions: 1 byte per zone */
  1645. args->zones_cond = kzalloc(args->nr_zones, GFP_NOIO);
  1646. if (!args->zones_cond)
  1647. return -ENOMEM;
  1648. if (!disk_need_zone_resources(disk))
  1649. return 0;
  1650. /*
  1651. * If the device has no limit on the maximum number of open and active
  1652. * zones, use BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE.
  1653. */
  1654. pool_size = max(lim->max_open_zones, lim->max_active_zones);
  1655. if (!pool_size)
  1656. pool_size =
  1657. min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, args->nr_zones);
  1658. if (!disk->zone_wplugs_hash)
  1659. return disk_alloc_zone_resources(disk, pool_size);
  1660. return 0;
  1661. }
  1662. /*
  1663. * Update the disk zone resources information and device queue limits.
  1664. * The disk queue is frozen when this is executed.
  1665. */
  1666. static int disk_update_zone_resources(struct gendisk *disk,
  1667. struct blk_revalidate_zone_args *args)
  1668. {
  1669. struct request_queue *q = disk->queue;
  1670. unsigned int nr_seq_zones;
  1671. unsigned int pool_size, memflags;
  1672. struct queue_limits lim;
  1673. int ret = 0;
  1674. lim = queue_limits_start_update(q);
  1675. memflags = blk_mq_freeze_queue(q);
  1676. disk->nr_zones = args->nr_zones;
  1677. if (args->nr_conv_zones >= disk->nr_zones) {
  1678. queue_limits_cancel_update(q);
  1679. pr_warn("%s: Invalid number of conventional zones %u / %u\n",
  1680. disk->disk_name, args->nr_conv_zones, disk->nr_zones);
  1681. ret = -ENODEV;
  1682. goto unfreeze;
  1683. }
  1684. disk->zone_capacity = args->zone_capacity;
  1685. disk->last_zone_capacity = args->last_zone_capacity;
  1686. disk_set_zones_cond_array(disk, args->zones_cond);
  1687. /*
  1688. * Some devices can advertise zone resource limits that are larger than
  1689. * the number of sequential zones of the zoned block device, e.g. a
  1690. * small ZNS namespace. For such case, assume that the zoned device has
  1691. * no zone resource limits.
  1692. */
  1693. nr_seq_zones = disk->nr_zones - args->nr_conv_zones;
  1694. if (lim.max_open_zones >= nr_seq_zones)
  1695. lim.max_open_zones = 0;
  1696. if (lim.max_active_zones >= nr_seq_zones)
  1697. lim.max_active_zones = 0;
  1698. if (!disk->zone_wplugs_pool)
  1699. goto commit;
  1700. /*
  1701. * If the device has no limit on the maximum number of open and active
  1702. * zones, set its max open zone limit to the mempool size to indicate
  1703. * to the user that there is a potential performance impact due to
  1704. * dynamic zone write plug allocation when simultaneously writing to
  1705. * more zones than the size of the mempool.
  1706. */
  1707. pool_size = max(lim.max_open_zones, lim.max_active_zones);
  1708. if (!pool_size)
  1709. pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones);
  1710. mempool_resize(disk->zone_wplugs_pool, pool_size);
  1711. if (!lim.max_open_zones && !lim.max_active_zones) {
  1712. if (pool_size < nr_seq_zones)
  1713. lim.max_open_zones = pool_size;
  1714. else
  1715. lim.max_open_zones = 0;
  1716. }
  1717. commit:
  1718. ret = queue_limits_commit_update(q, &lim);
  1719. unfreeze:
  1720. if (ret)
  1721. disk_free_zone_resources(disk);
  1722. blk_mq_unfreeze_queue(q, memflags);
  1723. return ret;
  1724. }
  1725. static int blk_revalidate_zone_cond(struct blk_zone *zone, unsigned int idx,
  1726. struct blk_revalidate_zone_args *args)
  1727. {
  1728. enum blk_zone_cond cond = zone->cond;
  1729. /* Check that the zone condition is consistent with the zone type. */
  1730. switch (cond) {
  1731. case BLK_ZONE_COND_NOT_WP:
  1732. if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
  1733. goto invalid_condition;
  1734. break;
  1735. case BLK_ZONE_COND_IMP_OPEN:
  1736. case BLK_ZONE_COND_EXP_OPEN:
  1737. case BLK_ZONE_COND_CLOSED:
  1738. case BLK_ZONE_COND_EMPTY:
  1739. case BLK_ZONE_COND_FULL:
  1740. case BLK_ZONE_COND_OFFLINE:
  1741. case BLK_ZONE_COND_READONLY:
  1742. if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
  1743. goto invalid_condition;
  1744. break;
  1745. default:
  1746. pr_warn("%s: Invalid zone condition 0x%X\n",
  1747. args->disk->disk_name, cond);
  1748. return -ENODEV;
  1749. }
  1750. blk_zone_set_cond(args->zones_cond, idx, cond);
  1751. return 0;
  1752. invalid_condition:
  1753. pr_warn("%s: Invalid zone condition 0x%x for type 0x%x\n",
  1754. args->disk->disk_name, cond, zone->type);
  1755. return -ENODEV;
  1756. }
  1757. static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
  1758. struct blk_revalidate_zone_args *args)
  1759. {
  1760. struct gendisk *disk = args->disk;
  1761. if (zone->capacity != zone->len) {
  1762. pr_warn("%s: Invalid conventional zone capacity\n",
  1763. disk->disk_name);
  1764. return -ENODEV;
  1765. }
  1766. if (disk_zone_is_last(disk, zone))
  1767. args->last_zone_capacity = zone->capacity;
  1768. args->nr_conv_zones++;
  1769. return 0;
  1770. }
  1771. static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
  1772. struct blk_revalidate_zone_args *args)
  1773. {
  1774. struct gendisk *disk = args->disk;
  1775. struct blk_zone_wplug *zwplug;
  1776. unsigned int wp_offset;
  1777. unsigned long flags;
  1778. /*
  1779. * Remember the capacity of the first sequential zone and check
  1780. * if it is constant for all zones, ignoring the last zone as it can be
  1781. * smaller.
  1782. */
  1783. if (!args->zone_capacity)
  1784. args->zone_capacity = zone->capacity;
  1785. if (disk_zone_is_last(disk, zone)) {
  1786. args->last_zone_capacity = zone->capacity;
  1787. } else if (zone->capacity != args->zone_capacity) {
  1788. pr_warn("%s: Invalid variable zone capacity\n",
  1789. disk->disk_name);
  1790. return -ENODEV;
  1791. }
  1792. /*
  1793. * If the device needs zone append emulation, we need to track the
  1794. * write pointer of all zones that are not empty nor full. So make sure
  1795. * we have a zone write plug for such zone if the device has a zone
  1796. * write plug hash table.
  1797. */
  1798. if (!disk->zone_wplugs_hash)
  1799. return 0;
  1800. wp_offset = disk_zone_wplug_sync_wp_offset(disk, zone);
  1801. if (!wp_offset || wp_offset >= zone->capacity)
  1802. return 0;
  1803. zwplug = disk_get_and_lock_zone_wplug(disk, zone->wp, GFP_NOIO, &flags);
  1804. if (!zwplug)
  1805. return -ENOMEM;
  1806. spin_unlock_irqrestore(&zwplug->lock, flags);
  1807. disk_put_zone_wplug(zwplug);
  1808. return 0;
  1809. }
  1810. /*
  1811. * Helper function to check the validity of zones of a zoned block device.
  1812. */
  1813. static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
  1814. void *data)
  1815. {
  1816. struct blk_revalidate_zone_args *args = data;
  1817. struct gendisk *disk = args->disk;
  1818. sector_t zone_sectors = disk->queue->limits.chunk_sectors;
  1819. int ret;
  1820. /* Check for bad zones and holes in the zone report */
  1821. if (zone->start != args->sector) {
  1822. pr_warn("%s: Zone gap at sectors %llu..%llu\n",
  1823. disk->disk_name, args->sector, zone->start);
  1824. return -ENODEV;
  1825. }
  1826. if (zone->start >= get_capacity(disk) || !zone->len) {
  1827. pr_warn("%s: Invalid zone start %llu, length %llu\n",
  1828. disk->disk_name, zone->start, zone->len);
  1829. return -ENODEV;
  1830. }
  1831. /*
  1832. * All zones must have the same size, with the exception on an eventual
  1833. * smaller last zone.
  1834. */
  1835. if (!disk_zone_is_last(disk, zone)) {
  1836. if (zone->len != zone_sectors) {
  1837. pr_warn("%s: Invalid zoned device with non constant zone size\n",
  1838. disk->disk_name);
  1839. return -ENODEV;
  1840. }
  1841. } else if (zone->len > zone_sectors) {
  1842. pr_warn("%s: Invalid zoned device with larger last zone size\n",
  1843. disk->disk_name);
  1844. return -ENODEV;
  1845. }
  1846. if (!zone->capacity || zone->capacity > zone->len) {
  1847. pr_warn("%s: Invalid zone capacity\n",
  1848. disk->disk_name);
  1849. return -ENODEV;
  1850. }
  1851. /* Check zone condition */
  1852. ret = blk_revalidate_zone_cond(zone, idx, args);
  1853. if (ret)
  1854. return ret;
  1855. /* Check zone type */
  1856. switch (zone->type) {
  1857. case BLK_ZONE_TYPE_CONVENTIONAL:
  1858. ret = blk_revalidate_conv_zone(zone, idx, args);
  1859. break;
  1860. case BLK_ZONE_TYPE_SEQWRITE_REQ:
  1861. ret = blk_revalidate_seq_zone(zone, idx, args);
  1862. break;
  1863. case BLK_ZONE_TYPE_SEQWRITE_PREF:
  1864. default:
  1865. pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
  1866. disk->disk_name, (int)zone->type, zone->start);
  1867. ret = -ENODEV;
  1868. }
  1869. if (!ret)
  1870. args->sector += zone->len;
  1871. return ret;
  1872. }
  1873. /**
  1874. * blk_revalidate_disk_zones - (re)allocate and initialize zone write plugs
  1875. * @disk: Target disk
  1876. *
  1877. * Helper function for low-level device drivers to check, (re) allocate and
  1878. * initialize resources used for managing zoned disks. This function should
  1879. * normally be called by blk-mq based drivers when a zoned gendisk is probed
  1880. * and when the zone configuration of the gendisk changes (e.g. after a format).
  1881. * Before calling this function, the device driver must already have set the
  1882. * device zone size (chunk_sector limit) and the max zone append limit.
  1883. * BIO based drivers can also use this function as long as the device queue
  1884. * can be safely frozen.
  1885. */
  1886. int blk_revalidate_disk_zones(struct gendisk *disk)
  1887. {
  1888. struct request_queue *q = disk->queue;
  1889. sector_t zone_sectors = q->limits.chunk_sectors;
  1890. sector_t capacity = get_capacity(disk);
  1891. struct blk_revalidate_zone_args args = { };
  1892. unsigned int memflags, noio_flag;
  1893. struct blk_report_zones_args rep_args = {
  1894. .cb = blk_revalidate_zone_cb,
  1895. .data = &args,
  1896. };
  1897. int ret = -ENOMEM;
  1898. if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
  1899. return -EIO;
  1900. if (!capacity)
  1901. return -ENODEV;
  1902. /*
  1903. * Checks that the device driver indicated a valid zone size and that
  1904. * the max zone append limit is set.
  1905. */
  1906. if (!zone_sectors || !is_power_of_2(zone_sectors)) {
  1907. pr_warn("%s: Invalid non power of two zone size (%llu)\n",
  1908. disk->disk_name, zone_sectors);
  1909. return -ENODEV;
  1910. }
  1911. /*
  1912. * Ensure that all memory allocations in this context are done as if
  1913. * GFP_NOIO was specified.
  1914. */
  1915. noio_flag = memalloc_noio_save();
  1916. ret = disk_revalidate_zone_resources(disk, &args);
  1917. if (ret) {
  1918. memalloc_noio_restore(noio_flag);
  1919. return ret;
  1920. }
  1921. ret = disk->fops->report_zones(disk, 0, UINT_MAX, &rep_args);
  1922. if (!ret) {
  1923. pr_warn("%s: No zones reported\n", disk->disk_name);
  1924. ret = -ENODEV;
  1925. }
  1926. memalloc_noio_restore(noio_flag);
  1927. /*
  1928. * If zones where reported, make sure that the entire disk capacity
  1929. * has been checked.
  1930. */
  1931. if (ret > 0 && args.sector != capacity) {
  1932. pr_warn("%s: Missing zones from sector %llu\n",
  1933. disk->disk_name, args.sector);
  1934. ret = -ENODEV;
  1935. }
  1936. if (ret > 0)
  1937. return disk_update_zone_resources(disk, &args);
  1938. pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
  1939. memflags = blk_mq_freeze_queue(q);
  1940. disk_free_zone_resources(disk);
  1941. blk_mq_unfreeze_queue(q, memflags);
  1942. return ret;
  1943. }
  1944. EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
  1945. /**
  1946. * blk_zone_issue_zeroout - zero-fill a block range in a zone
  1947. * @bdev: blockdev to write
  1948. * @sector: start sector
  1949. * @nr_sects: number of sectors to write
  1950. * @gfp_mask: memory allocation flags (for bio_alloc)
  1951. *
  1952. * Description:
  1953. * Zero-fill a block range in a zone (@sector must be equal to the zone write
  1954. * pointer), handling potential errors due to the (initially unknown) lack of
  1955. * hardware offload (See blkdev_issue_zeroout()).
  1956. */
  1957. int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
  1958. sector_t nr_sects, gfp_t gfp_mask)
  1959. {
  1960. struct gendisk *disk = bdev->bd_disk;
  1961. int ret;
  1962. if (WARN_ON_ONCE(!bdev_is_zoned(bdev)))
  1963. return -EIO;
  1964. ret = blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
  1965. BLKDEV_ZERO_NOFALLBACK);
  1966. if (ret != -EOPNOTSUPP)
  1967. return ret;
  1968. /*
  1969. * The failed call to blkdev_issue_zeroout() advanced the zone write
  1970. * pointer. Undo this using a report zone to update the zone write
  1971. * pointer to the correct current value.
  1972. */
  1973. ret = disk->fops->report_zones(disk, sector, 1, NULL);
  1974. if (ret != 1)
  1975. return ret < 0 ? ret : -EIO;
  1976. /*
  1977. * Retry without BLKDEV_ZERO_NOFALLBACK to force the fallback to a
  1978. * regular write with zero-pages.
  1979. */
  1980. return blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, 0);
  1981. }
  1982. EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout);
  1983. #ifdef CONFIG_BLK_DEBUG_FS
  1984. static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
  1985. struct seq_file *m)
  1986. {
  1987. unsigned int zwp_wp_offset, zwp_flags;
  1988. unsigned int zwp_zone_no, zwp_ref;
  1989. unsigned int zwp_bio_list_size;
  1990. enum blk_zone_cond zwp_cond;
  1991. unsigned long flags;
  1992. spin_lock_irqsave(&zwplug->lock, flags);
  1993. zwp_zone_no = zwplug->zone_no;
  1994. zwp_flags = zwplug->flags;
  1995. zwp_ref = refcount_read(&zwplug->ref);
  1996. zwp_cond = zwplug->cond;
  1997. zwp_wp_offset = zwplug->wp_offset;
  1998. zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
  1999. spin_unlock_irqrestore(&zwplug->lock, flags);
  2000. seq_printf(m,
  2001. "Zone no: %u, flags: 0x%x, ref: %u, cond: %s, wp ofst: %u, pending BIO: %u\n",
  2002. zwp_zone_no, zwp_flags, zwp_ref, blk_zone_cond_str(zwp_cond),
  2003. zwp_wp_offset, zwp_bio_list_size);
  2004. }
  2005. int queue_zone_wplugs_show(void *data, struct seq_file *m)
  2006. {
  2007. struct request_queue *q = data;
  2008. struct gendisk *disk = q->disk;
  2009. struct blk_zone_wplug *zwplug;
  2010. unsigned int i;
  2011. if (!disk->zone_wplugs_hash)
  2012. return 0;
  2013. rcu_read_lock();
  2014. for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++)
  2015. hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i],
  2016. node)
  2017. queue_zone_wplug_show(zwplug, m);
  2018. rcu_read_unlock();
  2019. return 0;
  2020. }
  2021. #endif