super.c 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Simple file system for zoned block devices exposing zones as files.
  4. *
  5. * Copyright (C) 2019 Western Digital Corporation or its affiliates.
  6. */
  7. #include <linux/module.h>
  8. #include <linux/pagemap.h>
  9. #include <linux/magic.h>
  10. #include <linux/iomap.h>
  11. #include <linux/init.h>
  12. #include <linux/slab.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/statfs.h>
  15. #include <linux/writeback.h>
  16. #include <linux/quotaops.h>
  17. #include <linux/seq_file.h>
  18. #include <linux/uio.h>
  19. #include <linux/mman.h>
  20. #include <linux/sched/mm.h>
  21. #include <linux/crc32.h>
  22. #include <linux/task_io_accounting_ops.h>
  23. #include <linux/fs_parser.h>
  24. #include <linux/fs_context.h>
  25. #include "zonefs.h"
  26. #define CREATE_TRACE_POINTS
  27. #include "trace.h"
  28. /*
  29. * Get the name of a zone group directory.
  30. */
  31. static const char *zonefs_zgroup_name(enum zonefs_ztype ztype)
  32. {
  33. switch (ztype) {
  34. case ZONEFS_ZTYPE_CNV:
  35. return "cnv";
  36. case ZONEFS_ZTYPE_SEQ:
  37. return "seq";
  38. default:
  39. WARN_ON_ONCE(1);
  40. return "???";
  41. }
  42. }
  43. /*
  44. * Manage the active zone count.
  45. */
  46. static void zonefs_account_active(struct super_block *sb,
  47. struct zonefs_zone *z)
  48. {
  49. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  50. if (zonefs_zone_is_cnv(z))
  51. return;
  52. /*
  53. * For zones that transitioned to the offline or readonly condition,
  54. * we only need to clear the active state.
  55. */
  56. if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
  57. goto out;
  58. /*
  59. * If the zone is active, that is, if it is explicitly open or
  60. * partially written, check if it was already accounted as active.
  61. */
  62. if ((z->z_flags & ZONEFS_ZONE_OPEN) ||
  63. (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) {
  64. if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) {
  65. z->z_flags |= ZONEFS_ZONE_ACTIVE;
  66. atomic_inc(&sbi->s_active_seq_files);
  67. }
  68. return;
  69. }
  70. out:
  71. /* The zone is not active. If it was, update the active count */
  72. if (z->z_flags & ZONEFS_ZONE_ACTIVE) {
  73. z->z_flags &= ~ZONEFS_ZONE_ACTIVE;
  74. atomic_dec(&sbi->s_active_seq_files);
  75. }
  76. }
  77. /*
  78. * Manage the active zone count. Called with zi->i_truncate_mutex held.
  79. */
  80. void zonefs_inode_account_active(struct inode *inode)
  81. {
  82. lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
  83. return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode));
  84. }
  85. /*
  86. * Execute a zone management operation.
  87. */
  88. static int zonefs_zone_mgmt(struct super_block *sb,
  89. struct zonefs_zone *z, enum req_op op)
  90. {
  91. int ret;
  92. /*
  93. * With ZNS drives, closing an explicitly open zone that has not been
  94. * written will change the zone state to "closed", that is, the zone
  95. * will remain active. Since this can then cause failure of explicit
  96. * open operation on other zones if the drive active zone resources
  97. * are exceeded, make sure that the zone does not remain active by
  98. * resetting it.
  99. */
  100. if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset)
  101. op = REQ_OP_ZONE_RESET;
  102. trace_zonefs_zone_mgmt(sb, z, op);
  103. ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
  104. z->z_size >> SECTOR_SHIFT);
  105. if (ret) {
  106. zonefs_err(sb,
  107. "Zone management operation %s at %llu failed %d\n",
  108. blk_op_str(op), z->z_sector, ret);
  109. return ret;
  110. }
  111. return 0;
  112. }
  113. int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op)
  114. {
  115. lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
  116. return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op);
  117. }
  118. void zonefs_i_size_write(struct inode *inode, loff_t isize)
  119. {
  120. struct zonefs_zone *z = zonefs_inode_zone(inode);
  121. i_size_write(inode, isize);
  122. /*
  123. * A full zone is no longer open/active and does not need
  124. * explicit closing.
  125. */
  126. if (isize >= z->z_capacity) {
  127. struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
  128. if (z->z_flags & ZONEFS_ZONE_ACTIVE)
  129. atomic_dec(&sbi->s_active_seq_files);
  130. z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
  131. }
  132. }
  133. void zonefs_update_stats(struct inode *inode, loff_t new_isize)
  134. {
  135. struct super_block *sb = inode->i_sb;
  136. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  137. loff_t old_isize = i_size_read(inode);
  138. loff_t nr_blocks;
  139. if (new_isize == old_isize)
  140. return;
  141. spin_lock(&sbi->s_lock);
  142. /*
  143. * This may be called for an update after an IO error.
  144. * So beware of the values seen.
  145. */
  146. if (new_isize < old_isize) {
  147. nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits;
  148. if (sbi->s_used_blocks > nr_blocks)
  149. sbi->s_used_blocks -= nr_blocks;
  150. else
  151. sbi->s_used_blocks = 0;
  152. } else {
  153. sbi->s_used_blocks +=
  154. (new_isize - old_isize) >> sb->s_blocksize_bits;
  155. if (sbi->s_used_blocks > sbi->s_blocks)
  156. sbi->s_used_blocks = sbi->s_blocks;
  157. }
  158. spin_unlock(&sbi->s_lock);
  159. }
  160. /*
  161. * Check a zone condition. Return the amount of written (and still readable)
  162. * data in the zone.
  163. */
  164. static loff_t zonefs_check_zone_condition(struct super_block *sb,
  165. struct zonefs_zone *z,
  166. struct blk_zone *zone)
  167. {
  168. switch (zone->cond) {
  169. case BLK_ZONE_COND_OFFLINE:
  170. zonefs_warn(sb, "Zone %llu: offline zone\n",
  171. z->z_sector);
  172. z->z_flags |= ZONEFS_ZONE_OFFLINE;
  173. return 0;
  174. case BLK_ZONE_COND_READONLY:
  175. /*
  176. * The write pointer of read-only zones is invalid, so we cannot
  177. * determine the zone wpoffset (inode size). We thus keep the
  178. * zone wpoffset as is, which leads to an empty file
  179. * (wpoffset == 0) on mount. For a runtime error, this keeps
  180. * the inode size as it was when last updated so that the user
  181. * can recover data.
  182. */
  183. zonefs_warn(sb, "Zone %llu: read-only zone\n",
  184. z->z_sector);
  185. z->z_flags |= ZONEFS_ZONE_READONLY;
  186. if (zonefs_zone_is_cnv(z))
  187. return z->z_capacity;
  188. return z->z_wpoffset;
  189. case BLK_ZONE_COND_FULL:
  190. /* The write pointer of full zones is invalid. */
  191. return z->z_capacity;
  192. default:
  193. if (zonefs_zone_is_cnv(z))
  194. return z->z_capacity;
  195. return (zone->wp - zone->start) << SECTOR_SHIFT;
  196. }
  197. }
  198. /*
  199. * Check a zone condition and adjust its inode access permissions for
  200. * offline and readonly zones.
  201. */
  202. static void zonefs_inode_update_mode(struct inode *inode)
  203. {
  204. struct zonefs_zone *z = zonefs_inode_zone(inode);
  205. if (z->z_flags & ZONEFS_ZONE_OFFLINE) {
  206. /* Offline zones cannot be read nor written */
  207. inode->i_flags |= S_IMMUTABLE;
  208. inode->i_mode &= ~0777;
  209. } else if (z->z_flags & ZONEFS_ZONE_READONLY) {
  210. /* Readonly zones cannot be written */
  211. inode->i_flags |= S_IMMUTABLE;
  212. if (z->z_flags & ZONEFS_ZONE_INIT_MODE)
  213. inode->i_mode &= ~0777;
  214. else
  215. inode->i_mode &= ~0222;
  216. }
  217. z->z_flags &= ~ZONEFS_ZONE_INIT_MODE;
  218. z->z_mode = inode->i_mode;
  219. }
  220. static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
  221. void *data)
  222. {
  223. struct blk_zone *z = data;
  224. *z = *zone;
  225. return 0;
  226. }
  227. static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
  228. bool write)
  229. {
  230. struct zonefs_zone *z = zonefs_inode_zone(inode);
  231. struct super_block *sb = inode->i_sb;
  232. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  233. loff_t isize, data_size;
  234. /*
  235. * Check the zone condition: if the zone is not "bad" (offline or
  236. * read-only), read errors are simply signaled to the IO issuer as long
  237. * as there is no inconsistency between the inode size and the amount of
  238. * data written in the zone (data_size).
  239. */
  240. data_size = zonefs_check_zone_condition(sb, z, zone);
  241. isize = i_size_read(inode);
  242. if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
  243. !write && isize == data_size)
  244. return;
  245. /*
  246. * At this point, we detected either a bad zone or an inconsistency
  247. * between the inode size and the amount of data written in the zone.
  248. * For the latter case, the cause may be a write IO error or an external
  249. * action on the device. Two error patterns exist:
  250. * 1) The inode size is lower than the amount of data in the zone:
  251. * a write operation partially failed and data was written at the end
  252. * of the file. This can happen in the case of a large direct IO
  253. * needing several BIOs and/or write requests to be processed.
  254. * 2) The inode size is larger than the amount of data in the zone:
  255. * this can happen with a deferred write error with the use of the
  256. * device side write cache after getting successful write IO
  257. * completions. Other possibilities are (a) an external corruption,
  258. * e.g. an application reset the zone directly, or (b) the device
  259. * has a serious problem (e.g. firmware bug).
  260. *
  261. * In all cases, warn about inode size inconsistency and handle the
  262. * IO error according to the zone condition and to the mount options.
  263. */
  264. if (isize != data_size)
  265. zonefs_warn(sb,
  266. "inode %lu: invalid size %lld (should be %lld)\n",
  267. inode->i_ino, isize, data_size);
  268. /*
  269. * First handle bad zones signaled by hardware. The mount options
  270. * errors=zone-ro and errors=zone-offline result in changing the
  271. * zone condition to read-only and offline respectively, as if the
  272. * condition was signaled by the hardware.
  273. */
  274. if ((z->z_flags & ZONEFS_ZONE_OFFLINE) ||
  275. (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
  276. zonefs_warn(sb, "inode %lu: read/write access disabled\n",
  277. inode->i_ino);
  278. if (!(z->z_flags & ZONEFS_ZONE_OFFLINE))
  279. z->z_flags |= ZONEFS_ZONE_OFFLINE;
  280. zonefs_inode_update_mode(inode);
  281. data_size = 0;
  282. } else if ((z->z_flags & ZONEFS_ZONE_READONLY) ||
  283. (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
  284. zonefs_warn(sb, "inode %lu: write access disabled\n",
  285. inode->i_ino);
  286. if (!(z->z_flags & ZONEFS_ZONE_READONLY))
  287. z->z_flags |= ZONEFS_ZONE_READONLY;
  288. zonefs_inode_update_mode(inode);
  289. data_size = isize;
  290. } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
  291. data_size > isize) {
  292. /* Do not expose garbage data */
  293. data_size = isize;
  294. }
  295. /*
  296. * If the filesystem is mounted with the explicit-open mount option, we
  297. * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
  298. * the read-only or offline condition, to avoid attempting an explicit
  299. * close of the zone when the inode file is closed.
  300. */
  301. if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
  302. (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
  303. z->z_flags &= ~ZONEFS_ZONE_OPEN;
  304. /*
  305. * If error=remount-ro was specified, any error result in remounting
  306. * the volume as read-only.
  307. */
  308. if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) {
  309. zonefs_warn(sb, "remounting filesystem read-only\n");
  310. sb->s_flags |= SB_RDONLY;
  311. }
  312. /*
  313. * Update block usage stats and the inode size to prevent access to
  314. * invalid data.
  315. */
  316. zonefs_update_stats(inode, data_size);
  317. zonefs_i_size_write(inode, data_size);
  318. z->z_wpoffset = data_size;
  319. zonefs_inode_account_active(inode);
  320. }
  321. /*
  322. * When an file IO error occurs, check the file zone to see if there is a change
  323. * in the zone condition (e.g. offline or read-only). For a failed write to a
  324. * sequential zone, the zone write pointer position must also be checked to
  325. * eventually correct the file size and zonefs inode write pointer offset
  326. * (which can be out of sync with the drive due to partial write failures).
  327. */
  328. void __zonefs_io_error(struct inode *inode, bool write)
  329. {
  330. struct zonefs_zone *z = zonefs_inode_zone(inode);
  331. struct super_block *sb = inode->i_sb;
  332. unsigned int noio_flag;
  333. struct blk_zone zone;
  334. int ret;
  335. /*
  336. * Conventional zone have no write pointer and cannot become read-only
  337. * or offline. So simply fake a report for a single or aggregated zone
  338. * and let zonefs_handle_io_error() correct the zone inode information
  339. * according to the mount options.
  340. */
  341. if (!zonefs_zone_is_seq(z)) {
  342. zone.start = z->z_sector;
  343. zone.len = z->z_size >> SECTOR_SHIFT;
  344. zone.wp = zone.start + zone.len;
  345. zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
  346. zone.cond = BLK_ZONE_COND_NOT_WP;
  347. zone.capacity = zone.len;
  348. goto handle_io_error;
  349. }
  350. /*
  351. * Memory allocations in blkdev_report_zones() can trigger a memory
  352. * reclaim which may in turn cause a recursion into zonefs as well as
  353. * struct request allocations for the same device. The former case may
  354. * end up in a deadlock on the inode truncate mutex, while the latter
  355. * may prevent IO forward progress. Executing the report zones under
  356. * the GFP_NOIO context avoids both problems.
  357. */
  358. noio_flag = memalloc_noio_save();
  359. ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
  360. zonefs_io_error_cb, &zone);
  361. memalloc_noio_restore(noio_flag);
  362. if (ret != 1) {
  363. zonefs_err(sb, "Get inode %lu zone information failed %d\n",
  364. inode->i_ino, ret);
  365. zonefs_warn(sb, "remounting filesystem read-only\n");
  366. sb->s_flags |= SB_RDONLY;
  367. return;
  368. }
  369. handle_io_error:
  370. zonefs_handle_io_error(inode, &zone, write);
  371. }
  372. static struct kmem_cache *zonefs_inode_cachep;
  373. static struct inode *zonefs_alloc_inode(struct super_block *sb)
  374. {
  375. struct zonefs_inode_info *zi;
  376. zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
  377. if (!zi)
  378. return NULL;
  379. inode_init_once(&zi->i_vnode);
  380. mutex_init(&zi->i_truncate_mutex);
  381. zi->i_wr_refcnt = 0;
  382. return &zi->i_vnode;
  383. }
  384. static void zonefs_free_inode(struct inode *inode)
  385. {
  386. kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
  387. }
  388. /*
  389. * File system stat.
  390. */
  391. static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
  392. {
  393. struct super_block *sb = dentry->d_sb;
  394. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  395. enum zonefs_ztype t;
  396. buf->f_type = ZONEFS_MAGIC;
  397. buf->f_bsize = sb->s_blocksize;
  398. buf->f_namelen = ZONEFS_NAME_MAX;
  399. spin_lock(&sbi->s_lock);
  400. buf->f_blocks = sbi->s_blocks;
  401. if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
  402. buf->f_bfree = 0;
  403. else
  404. buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
  405. buf->f_bavail = buf->f_bfree;
  406. for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
  407. if (sbi->s_zgroup[t].g_nr_zones)
  408. buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
  409. }
  410. buf->f_ffree = 0;
  411. spin_unlock(&sbi->s_lock);
  412. buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
  413. return 0;
  414. }
  415. enum {
  416. Opt_errors, Opt_explicit_open,
  417. };
  418. struct zonefs_context {
  419. unsigned long s_mount_opts;
  420. };
  421. static const struct constant_table zonefs_param_errors[] = {
  422. {"remount-ro", ZONEFS_MNTOPT_ERRORS_RO},
  423. {"zone-ro", ZONEFS_MNTOPT_ERRORS_ZRO},
  424. {"zone-offline", ZONEFS_MNTOPT_ERRORS_ZOL},
  425. {"repair", ZONEFS_MNTOPT_ERRORS_REPAIR},
  426. {}
  427. };
  428. static const struct fs_parameter_spec zonefs_param_spec[] = {
  429. fsparam_enum ("errors", Opt_errors, zonefs_param_errors),
  430. fsparam_flag ("explicit-open", Opt_explicit_open),
  431. {}
  432. };
  433. static int zonefs_parse_param(struct fs_context *fc, struct fs_parameter *param)
  434. {
  435. struct zonefs_context *ctx = fc->fs_private;
  436. struct fs_parse_result result;
  437. int opt;
  438. opt = fs_parse(fc, zonefs_param_spec, param, &result);
  439. if (opt < 0)
  440. return opt;
  441. switch (opt) {
  442. case Opt_errors:
  443. ctx->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
  444. ctx->s_mount_opts |= result.uint_32;
  445. break;
  446. case Opt_explicit_open:
  447. ctx->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN;
  448. break;
  449. default:
  450. return -EINVAL;
  451. }
  452. return 0;
  453. }
  454. static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
  455. {
  456. struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
  457. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
  458. seq_puts(seq, ",errors=remount-ro");
  459. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
  460. seq_puts(seq, ",errors=zone-ro");
  461. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
  462. seq_puts(seq, ",errors=zone-offline");
  463. if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
  464. seq_puts(seq, ",errors=repair");
  465. return 0;
  466. }
  467. static int zonefs_inode_setattr(struct mnt_idmap *idmap,
  468. struct dentry *dentry, struct iattr *iattr)
  469. {
  470. struct inode *inode = d_inode(dentry);
  471. int ret;
  472. if (unlikely(IS_IMMUTABLE(inode)))
  473. return -EPERM;
  474. ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
  475. if (ret)
  476. return ret;
  477. /*
  478. * Since files and directories cannot be created nor deleted, do not
  479. * allow setting any write attributes on the sub-directories grouping
  480. * files by zone type.
  481. */
  482. if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
  483. (iattr->ia_mode & 0222))
  484. return -EPERM;
  485. if (((iattr->ia_valid & ATTR_UID) &&
  486. !uid_eq(iattr->ia_uid, inode->i_uid)) ||
  487. ((iattr->ia_valid & ATTR_GID) &&
  488. !gid_eq(iattr->ia_gid, inode->i_gid))) {
  489. ret = dquot_transfer(&nop_mnt_idmap, inode, iattr);
  490. if (ret)
  491. return ret;
  492. }
  493. if (iattr->ia_valid & ATTR_SIZE) {
  494. ret = zonefs_file_truncate(inode, iattr->ia_size);
  495. if (ret)
  496. return ret;
  497. }
  498. setattr_copy(&nop_mnt_idmap, inode, iattr);
  499. if (S_ISREG(inode->i_mode)) {
  500. struct zonefs_zone *z = zonefs_inode_zone(inode);
  501. z->z_mode = inode->i_mode;
  502. z->z_uid = inode->i_uid;
  503. z->z_gid = inode->i_gid;
  504. }
  505. return 0;
  506. }
  507. static const struct inode_operations zonefs_file_inode_operations = {
  508. .setattr = zonefs_inode_setattr,
  509. };
  510. static long zonefs_fname_to_fno(const struct qstr *fname)
  511. {
  512. const char *name = fname->name;
  513. unsigned int len = fname->len;
  514. long fno = 0, shift = 1;
  515. const char *rname;
  516. char c = *name;
  517. unsigned int i;
  518. /*
  519. * File names are always a base-10 number string without any
  520. * leading 0s.
  521. */
  522. if (!isdigit(c))
  523. return -ENOENT;
  524. if (len > 1 && c == '0')
  525. return -ENOENT;
  526. if (len == 1)
  527. return c - '0';
  528. for (i = 0, rname = name + len - 1; i < len; i++, rname--) {
  529. c = *rname;
  530. if (!isdigit(c))
  531. return -ENOENT;
  532. fno += (c - '0') * shift;
  533. shift *= 10;
  534. }
  535. return fno;
  536. }
  537. static struct inode *zonefs_get_file_inode(struct inode *dir,
  538. struct dentry *dentry)
  539. {
  540. struct zonefs_zone_group *zgroup = dir->i_private;
  541. struct super_block *sb = dir->i_sb;
  542. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  543. struct zonefs_zone *z;
  544. struct inode *inode;
  545. ino_t ino;
  546. long fno;
  547. /* Get the file number from the file name */
  548. fno = zonefs_fname_to_fno(&dentry->d_name);
  549. if (fno < 0)
  550. return ERR_PTR(fno);
  551. if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones)
  552. return ERR_PTR(-ENOENT);
  553. z = &zgroup->g_zones[fno];
  554. ino = z->z_sector >> sbi->s_zone_sectors_shift;
  555. inode = iget_locked(sb, ino);
  556. if (!inode)
  557. return ERR_PTR(-ENOMEM);
  558. if (!(inode_state_read_once(inode) & I_NEW)) {
  559. WARN_ON_ONCE(inode->i_private != z);
  560. return inode;
  561. }
  562. inode->i_ino = ino;
  563. inode->i_mode = z->z_mode;
  564. inode_set_mtime_to_ts(inode,
  565. inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(dir))));
  566. inode->i_uid = z->z_uid;
  567. inode->i_gid = z->z_gid;
  568. inode->i_size = z->z_wpoffset;
  569. inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
  570. inode->i_private = z;
  571. inode->i_op = &zonefs_file_inode_operations;
  572. inode->i_fop = &zonefs_file_operations;
  573. inode->i_mapping->a_ops = &zonefs_file_aops;
  574. mapping_set_large_folios(inode->i_mapping);
  575. /* Update the inode access rights depending on the zone condition */
  576. zonefs_inode_update_mode(inode);
  577. unlock_new_inode(inode);
  578. return inode;
  579. }
  580. static struct inode *zonefs_get_zgroup_inode(struct super_block *sb,
  581. enum zonefs_ztype ztype)
  582. {
  583. struct inode *root = d_inode(sb->s_root);
  584. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  585. struct inode *inode;
  586. ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;
  587. inode = iget_locked(sb, ino);
  588. if (!inode)
  589. return ERR_PTR(-ENOMEM);
  590. if (!(inode_state_read_once(inode) & I_NEW))
  591. return inode;
  592. inode->i_ino = ino;
  593. inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555);
  594. inode->i_size = sbi->s_zgroup[ztype].g_nr_zones;
  595. inode_set_mtime_to_ts(inode,
  596. inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(root))));
  597. inode->i_private = &sbi->s_zgroup[ztype];
  598. set_nlink(inode, 2);
  599. inode->i_op = &zonefs_dir_inode_operations;
  600. inode->i_fop = &zonefs_dir_operations;
  601. unlock_new_inode(inode);
  602. return inode;
  603. }
  604. static struct inode *zonefs_get_dir_inode(struct inode *dir,
  605. struct dentry *dentry)
  606. {
  607. struct super_block *sb = dir->i_sb;
  608. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  609. const char *name = dentry->d_name.name;
  610. enum zonefs_ztype ztype;
  611. /*
  612. * We only need to check for the "seq" directory and
  613. * the "cnv" directory if we have conventional zones.
  614. */
  615. if (dentry->d_name.len != 3)
  616. return ERR_PTR(-ENOENT);
  617. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  618. if (sbi->s_zgroup[ztype].g_nr_zones &&
  619. memcmp(name, zonefs_zgroup_name(ztype), 3) == 0)
  620. break;
  621. }
  622. if (ztype == ZONEFS_ZTYPE_MAX)
  623. return ERR_PTR(-ENOENT);
  624. return zonefs_get_zgroup_inode(sb, ztype);
  625. }
  626. static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry,
  627. unsigned int flags)
  628. {
  629. struct inode *inode;
  630. if (dentry->d_name.len > ZONEFS_NAME_MAX)
  631. return ERR_PTR(-ENAMETOOLONG);
  632. if (dir == d_inode(dir->i_sb->s_root))
  633. inode = zonefs_get_dir_inode(dir, dentry);
  634. else
  635. inode = zonefs_get_file_inode(dir, dentry);
  636. return d_splice_alias(inode, dentry);
  637. }
  638. static int zonefs_readdir_root(struct file *file, struct dir_context *ctx)
  639. {
  640. struct inode *inode = file_inode(file);
  641. struct super_block *sb = inode->i_sb;
  642. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  643. enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV;
  644. ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1;
  645. if (ctx->pos >= inode->i_size)
  646. return 0;
  647. if (!dir_emit_dots(file, ctx))
  648. return 0;
  649. if (ctx->pos == 2) {
  650. if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones)
  651. ztype = ZONEFS_ZTYPE_SEQ;
  652. if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
  653. base_ino + ztype, DT_DIR))
  654. return 0;
  655. ctx->pos++;
  656. }
  657. if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) {
  658. ztype = ZONEFS_ZTYPE_SEQ;
  659. if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3,
  660. base_ino + ztype, DT_DIR))
  661. return 0;
  662. ctx->pos++;
  663. }
  664. return 0;
  665. }
  666. static int zonefs_readdir_zgroup(struct file *file,
  667. struct dir_context *ctx)
  668. {
  669. struct inode *inode = file_inode(file);
  670. struct zonefs_zone_group *zgroup = inode->i_private;
  671. struct super_block *sb = inode->i_sb;
  672. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  673. struct zonefs_zone *z;
  674. int fname_len;
  675. char *fname;
  676. ino_t ino;
  677. int f;
  678. /*
  679. * The size of zone group directories is equal to the number
  680. * of zone files in the group and does note include the "." and
  681. * ".." entries. Hence the "+ 2" here.
  682. */
  683. if (ctx->pos >= inode->i_size + 2)
  684. return 0;
  685. if (!dir_emit_dots(file, ctx))
  686. return 0;
  687. fname = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
  688. if (!fname)
  689. return -ENOMEM;
  690. for (f = ctx->pos - 2; f < zgroup->g_nr_zones; f++) {
  691. z = &zgroup->g_zones[f];
  692. ino = z->z_sector >> sbi->s_zone_sectors_shift;
  693. fname_len = snprintf(fname, ZONEFS_NAME_MAX - 1, "%u", f);
  694. if (!dir_emit(ctx, fname, fname_len, ino, DT_REG))
  695. break;
  696. ctx->pos++;
  697. }
  698. kfree(fname);
  699. return 0;
  700. }
  701. static int zonefs_readdir(struct file *file, struct dir_context *ctx)
  702. {
  703. struct inode *inode = file_inode(file);
  704. if (inode == d_inode(inode->i_sb->s_root))
  705. return zonefs_readdir_root(file, ctx);
  706. return zonefs_readdir_zgroup(file, ctx);
  707. }
  708. const struct inode_operations zonefs_dir_inode_operations = {
  709. .lookup = zonefs_lookup,
  710. .setattr = zonefs_inode_setattr,
  711. };
  712. const struct file_operations zonefs_dir_operations = {
  713. .llseek = generic_file_llseek,
  714. .read = generic_read_dir,
  715. .iterate_shared = zonefs_readdir,
  716. };
  717. struct zonefs_zone_data {
  718. struct super_block *sb;
  719. unsigned int nr_zones[ZONEFS_ZTYPE_MAX];
  720. sector_t cnv_zone_start;
  721. struct blk_zone *zones;
  722. };
  723. static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
  724. void *data)
  725. {
  726. struct zonefs_zone_data *zd = data;
  727. struct super_block *sb = zd->sb;
  728. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  729. /*
  730. * We do not care about the first zone: it contains the super block
  731. * and not exposed as a file.
  732. */
  733. if (!idx)
  734. return 0;
  735. /*
  736. * Count the number of zones that will be exposed as files.
  737. * For sequential zones, we always have as many files as zones.
  738. * FOr conventional zones, the number of files depends on if we have
  739. * conventional zones aggregation enabled.
  740. */
  741. switch (zone->type) {
  742. case BLK_ZONE_TYPE_CONVENTIONAL:
  743. if (sbi->s_features & ZONEFS_F_AGGRCNV) {
  744. /* One file per set of contiguous conventional zones */
  745. if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ||
  746. zone->start != zd->cnv_zone_start)
  747. sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
  748. zd->cnv_zone_start = zone->start + zone->len;
  749. } else {
  750. /* One file per zone */
  751. sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
  752. }
  753. break;
  754. case BLK_ZONE_TYPE_SEQWRITE_REQ:
  755. case BLK_ZONE_TYPE_SEQWRITE_PREF:
  756. sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++;
  757. break;
  758. default:
  759. zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
  760. zone->type);
  761. return -EIO;
  762. }
  763. memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone));
  764. return 0;
  765. }
  766. static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
  767. {
  768. struct block_device *bdev = zd->sb->s_bdev;
  769. int ret;
  770. zd->zones = kvzalloc_objs(struct blk_zone, bdev_nr_zones(bdev));
  771. if (!zd->zones)
  772. return -ENOMEM;
  773. /* Get zones information from the device */
  774. ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES,
  775. zonefs_get_zone_info_cb, zd);
  776. if (ret < 0) {
  777. zonefs_err(zd->sb, "Zone report failed %d\n", ret);
  778. return ret;
  779. }
  780. if (ret != bdev_nr_zones(bdev)) {
  781. zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
  782. ret, bdev_nr_zones(bdev));
  783. return -EIO;
  784. }
  785. return 0;
  786. }
  787. static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd)
  788. {
  789. kvfree(zd->zones);
  790. }
  791. /*
  792. * Create a zone group and populate it with zone files.
  793. */
  794. static int zonefs_init_zgroup(struct super_block *sb,
  795. struct zonefs_zone_data *zd,
  796. enum zonefs_ztype ztype)
  797. {
  798. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  799. struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
  800. struct blk_zone *zone, *next, *end;
  801. struct zonefs_zone *z;
  802. unsigned int n = 0;
  803. int ret;
  804. /* Allocate the zone group. If it is empty, we have nothing to do. */
  805. if (!zgroup->g_nr_zones)
  806. return 0;
  807. zgroup->g_zones = kvzalloc_objs(struct zonefs_zone, zgroup->g_nr_zones);
  808. if (!zgroup->g_zones)
  809. return -ENOMEM;
  810. /*
  811. * Initialize the zone groups using the device zone information.
  812. * We always skip the first zone as it contains the super block
  813. * and is not use to back a file.
  814. */
  815. end = zd->zones + bdev_nr_zones(sb->s_bdev);
  816. for (zone = &zd->zones[1]; zone < end; zone = next) {
  817. next = zone + 1;
  818. if (zonefs_zone_type(zone) != ztype)
  819. continue;
  820. if (WARN_ON_ONCE(n >= zgroup->g_nr_zones))
  821. return -EINVAL;
  822. /*
  823. * For conventional zones, contiguous zones can be aggregated
  824. * together to form larger files. Note that this overwrites the
  825. * length of the first zone of the set of contiguous zones
  826. * aggregated together. If one offline or read-only zone is
  827. * found, assume that all zones aggregated have the same
  828. * condition.
  829. */
  830. if (ztype == ZONEFS_ZTYPE_CNV &&
  831. (sbi->s_features & ZONEFS_F_AGGRCNV)) {
  832. for (; next < end; next++) {
  833. if (zonefs_zone_type(next) != ztype)
  834. break;
  835. zone->len += next->len;
  836. zone->capacity += next->capacity;
  837. if (next->cond == BLK_ZONE_COND_READONLY &&
  838. zone->cond != BLK_ZONE_COND_OFFLINE)
  839. zone->cond = BLK_ZONE_COND_READONLY;
  840. else if (next->cond == BLK_ZONE_COND_OFFLINE)
  841. zone->cond = BLK_ZONE_COND_OFFLINE;
  842. }
  843. }
  844. z = &zgroup->g_zones[n];
  845. if (ztype == ZONEFS_ZTYPE_CNV)
  846. z->z_flags |= ZONEFS_ZONE_CNV;
  847. z->z_sector = zone->start;
  848. z->z_size = zone->len << SECTOR_SHIFT;
  849. if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
  850. !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
  851. zonefs_err(sb,
  852. "Invalid zone size %llu (device zone sectors %llu)\n",
  853. z->z_size,
  854. bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
  855. return -EINVAL;
  856. }
  857. z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE,
  858. zone->capacity << SECTOR_SHIFT);
  859. z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone);
  860. z->z_mode = S_IFREG | sbi->s_perm;
  861. z->z_uid = sbi->s_uid;
  862. z->z_gid = sbi->s_gid;
  863. /*
  864. * Let zonefs_inode_update_mode() know that we will need
  865. * special initialization of the inode mode the first time
  866. * it is accessed.
  867. */
  868. z->z_flags |= ZONEFS_ZONE_INIT_MODE;
  869. sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes);
  870. sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits;
  871. sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits;
  872. /*
  873. * For sequential zones, make sure that any open zone is closed
  874. * first to ensure that the initial number of open zones is 0,
  875. * in sync with the open zone accounting done when the mount
  876. * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
  877. */
  878. if (ztype == ZONEFS_ZTYPE_SEQ &&
  879. (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
  880. zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
  881. ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE);
  882. if (ret)
  883. return ret;
  884. }
  885. zonefs_account_active(sb, z);
  886. n++;
  887. }
  888. if (WARN_ON_ONCE(n != zgroup->g_nr_zones))
  889. return -EINVAL;
  890. zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
  891. zonefs_zgroup_name(ztype),
  892. zgroup->g_nr_zones,
  893. str_plural(zgroup->g_nr_zones));
  894. return 0;
  895. }
  896. static void zonefs_free_zgroups(struct super_block *sb)
  897. {
  898. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  899. enum zonefs_ztype ztype;
  900. if (!sbi)
  901. return;
  902. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  903. kvfree(sbi->s_zgroup[ztype].g_zones);
  904. sbi->s_zgroup[ztype].g_zones = NULL;
  905. }
  906. }
  907. /*
  908. * Create a zone group and populate it with zone files.
  909. */
  910. static int zonefs_init_zgroups(struct super_block *sb)
  911. {
  912. struct zonefs_zone_data zd;
  913. enum zonefs_ztype ztype;
  914. int ret;
  915. /* First get the device zone information */
  916. memset(&zd, 0, sizeof(struct zonefs_zone_data));
  917. zd.sb = sb;
  918. ret = zonefs_get_zone_info(&zd);
  919. if (ret)
  920. goto cleanup;
  921. /* Allocate and initialize the zone groups */
  922. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  923. ret = zonefs_init_zgroup(sb, &zd, ztype);
  924. if (ret) {
  925. zonefs_info(sb,
  926. "Zone group \"%s\" initialization failed\n",
  927. zonefs_zgroup_name(ztype));
  928. break;
  929. }
  930. }
  931. cleanup:
  932. zonefs_free_zone_info(&zd);
  933. if (ret)
  934. zonefs_free_zgroups(sb);
  935. return ret;
  936. }
  937. /*
  938. * Read super block information from the device.
  939. */
  940. static int zonefs_read_super(struct super_block *sb)
  941. {
  942. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  943. struct zonefs_super *super;
  944. u32 crc, stored_crc;
  945. int ret;
  946. super = kmalloc(ZONEFS_SUPER_SIZE, GFP_KERNEL);
  947. if (!super)
  948. return -ENOMEM;
  949. ret = bdev_rw_virt(sb->s_bdev, 0, super, ZONEFS_SUPER_SIZE,
  950. REQ_OP_READ);
  951. if (ret)
  952. goto free_super;
  953. ret = -EINVAL;
  954. if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC)
  955. goto free_super;
  956. stored_crc = le32_to_cpu(super->s_crc);
  957. super->s_crc = 0;
  958. crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super));
  959. if (crc != stored_crc) {
  960. zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)",
  961. crc, stored_crc);
  962. goto free_super;
  963. }
  964. sbi->s_features = le64_to_cpu(super->s_features);
  965. if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) {
  966. zonefs_err(sb, "Unknown features set 0x%llx\n",
  967. sbi->s_features);
  968. goto free_super;
  969. }
  970. if (sbi->s_features & ZONEFS_F_UID) {
  971. sbi->s_uid = make_kuid(current_user_ns(),
  972. le32_to_cpu(super->s_uid));
  973. if (!uid_valid(sbi->s_uid)) {
  974. zonefs_err(sb, "Invalid UID feature\n");
  975. goto free_super;
  976. }
  977. }
  978. if (sbi->s_features & ZONEFS_F_GID) {
  979. sbi->s_gid = make_kgid(current_user_ns(),
  980. le32_to_cpu(super->s_gid));
  981. if (!gid_valid(sbi->s_gid)) {
  982. zonefs_err(sb, "Invalid GID feature\n");
  983. goto free_super;
  984. }
  985. }
  986. if (sbi->s_features & ZONEFS_F_PERM)
  987. sbi->s_perm = le32_to_cpu(super->s_perm);
  988. if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) {
  989. zonefs_err(sb, "Reserved area is being used\n");
  990. goto free_super;
  991. }
  992. import_uuid(&sbi->s_uuid, super->s_uuid);
  993. ret = 0;
  994. free_super:
  995. kfree(super);
  996. return ret;
  997. }
  998. static const struct super_operations zonefs_sops = {
  999. .alloc_inode = zonefs_alloc_inode,
  1000. .free_inode = zonefs_free_inode,
  1001. .statfs = zonefs_statfs,
  1002. .show_options = zonefs_show_options,
  1003. };
  1004. static int zonefs_get_zgroup_inodes(struct super_block *sb)
  1005. {
  1006. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  1007. struct inode *dir_inode;
  1008. enum zonefs_ztype ztype;
  1009. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  1010. if (!sbi->s_zgroup[ztype].g_nr_zones)
  1011. continue;
  1012. dir_inode = zonefs_get_zgroup_inode(sb, ztype);
  1013. if (IS_ERR(dir_inode))
  1014. return PTR_ERR(dir_inode);
  1015. sbi->s_zgroup[ztype].g_inode = dir_inode;
  1016. }
  1017. return 0;
  1018. }
  1019. static void zonefs_release_zgroup_inodes(struct super_block *sb)
  1020. {
  1021. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  1022. enum zonefs_ztype ztype;
  1023. if (!sbi)
  1024. return;
  1025. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  1026. if (sbi->s_zgroup[ztype].g_inode) {
  1027. iput(sbi->s_zgroup[ztype].g_inode);
  1028. sbi->s_zgroup[ztype].g_inode = NULL;
  1029. }
  1030. }
  1031. }
  1032. /*
  1033. * Check that the device is zoned. If it is, get the list of zones and create
  1034. * sub-directories and files according to the device zone configuration and
  1035. * format options.
  1036. */
  1037. static int zonefs_fill_super(struct super_block *sb, struct fs_context *fc)
  1038. {
  1039. struct zonefs_sb_info *sbi;
  1040. struct zonefs_context *ctx = fc->fs_private;
  1041. struct inode *inode;
  1042. enum zonefs_ztype ztype;
  1043. int ret;
  1044. if (!bdev_is_zoned(sb->s_bdev)) {
  1045. zonefs_err(sb, "Not a zoned block device\n");
  1046. return -EINVAL;
  1047. }
  1048. /*
  1049. * Initialize super block information: the maximum file size is updated
  1050. * when the zone files are created so that the format option
  1051. * ZONEFS_F_AGGRCNV which increases the maximum file size of a file
  1052. * beyond the zone size is taken into account.
  1053. */
  1054. sbi = kzalloc_obj(*sbi);
  1055. if (!sbi)
  1056. return -ENOMEM;
  1057. spin_lock_init(&sbi->s_lock);
  1058. sb->s_fs_info = sbi;
  1059. sb->s_magic = ZONEFS_MAGIC;
  1060. sb->s_maxbytes = 0;
  1061. sb->s_op = &zonefs_sops;
  1062. sb->s_time_gran = 1;
  1063. /*
  1064. * The block size is set to the device zone write granularity to ensure
  1065. * that write operations are always aligned according to the device
  1066. * interface constraints.
  1067. */
  1068. sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev));
  1069. sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev));
  1070. sbi->s_uid = GLOBAL_ROOT_UID;
  1071. sbi->s_gid = GLOBAL_ROOT_GID;
  1072. sbi->s_perm = 0640;
  1073. sbi->s_mount_opts = ctx->s_mount_opts;
  1074. atomic_set(&sbi->s_wro_seq_files, 0);
  1075. sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
  1076. atomic_set(&sbi->s_active_seq_files, 0);
  1077. sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
  1078. ret = zonefs_read_super(sb);
  1079. if (ret)
  1080. return ret;
  1081. zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
  1082. if (!sbi->s_max_wro_seq_files &&
  1083. !sbi->s_max_active_seq_files &&
  1084. sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
  1085. zonefs_info(sb,
  1086. "No open and active zone limits. Ignoring explicit_open mount option\n");
  1087. sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
  1088. }
  1089. /* Initialize the zone groups */
  1090. ret = zonefs_init_zgroups(sb);
  1091. if (ret)
  1092. goto cleanup;
  1093. /* Create the root directory inode */
  1094. ret = -ENOMEM;
  1095. inode = new_inode(sb);
  1096. if (!inode)
  1097. goto cleanup;
  1098. inode->i_ino = bdev_nr_zones(sb->s_bdev);
  1099. inode->i_mode = S_IFDIR | 0555;
  1100. simple_inode_init_ts(inode);
  1101. inode->i_op = &zonefs_dir_inode_operations;
  1102. inode->i_fop = &zonefs_dir_operations;
  1103. inode->i_size = 2;
  1104. set_nlink(inode, 2);
  1105. for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
  1106. if (sbi->s_zgroup[ztype].g_nr_zones) {
  1107. inc_nlink(inode);
  1108. inode->i_size++;
  1109. }
  1110. }
  1111. sb->s_root = d_make_root(inode);
  1112. if (!sb->s_root)
  1113. goto cleanup;
  1114. /*
  1115. * Take a reference on the zone groups directory inodes
  1116. * to keep them in the inode cache.
  1117. */
  1118. ret = zonefs_get_zgroup_inodes(sb);
  1119. if (ret)
  1120. goto cleanup;
  1121. ret = zonefs_sysfs_register(sb);
  1122. if (ret)
  1123. goto cleanup;
  1124. return 0;
  1125. cleanup:
  1126. zonefs_release_zgroup_inodes(sb);
  1127. zonefs_free_zgroups(sb);
  1128. return ret;
  1129. }
  1130. static void zonefs_kill_super(struct super_block *sb)
  1131. {
  1132. struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
  1133. /* Release the reference on the zone group directory inodes */
  1134. zonefs_release_zgroup_inodes(sb);
  1135. kill_block_super(sb);
  1136. zonefs_sysfs_unregister(sb);
  1137. zonefs_free_zgroups(sb);
  1138. kfree(sbi);
  1139. }
  1140. static void zonefs_free_fc(struct fs_context *fc)
  1141. {
  1142. struct zonefs_context *ctx = fc->fs_private;
  1143. kfree(ctx);
  1144. }
  1145. static int zonefs_get_tree(struct fs_context *fc)
  1146. {
  1147. return get_tree_bdev(fc, zonefs_fill_super);
  1148. }
  1149. static int zonefs_reconfigure(struct fs_context *fc)
  1150. {
  1151. struct zonefs_context *ctx = fc->fs_private;
  1152. struct super_block *sb = fc->root->d_sb;
  1153. struct zonefs_sb_info *sbi = sb->s_fs_info;
  1154. sync_filesystem(fc->root->d_sb);
  1155. /* Copy new options from ctx into sbi. */
  1156. sbi->s_mount_opts = ctx->s_mount_opts;
  1157. return 0;
  1158. }
  1159. static const struct fs_context_operations zonefs_context_ops = {
  1160. .parse_param = zonefs_parse_param,
  1161. .get_tree = zonefs_get_tree,
  1162. .reconfigure = zonefs_reconfigure,
  1163. .free = zonefs_free_fc,
  1164. };
  1165. /*
  1166. * Set up the filesystem mount context.
  1167. */
  1168. static int zonefs_init_fs_context(struct fs_context *fc)
  1169. {
  1170. struct zonefs_context *ctx;
  1171. ctx = kzalloc_obj(struct zonefs_context);
  1172. if (!ctx)
  1173. return -ENOMEM;
  1174. ctx->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
  1175. fc->ops = &zonefs_context_ops;
  1176. fc->fs_private = ctx;
  1177. return 0;
  1178. }
  1179. /*
  1180. * File system definition and registration.
  1181. */
  1182. static struct file_system_type zonefs_type = {
  1183. .owner = THIS_MODULE,
  1184. .name = "zonefs",
  1185. .kill_sb = zonefs_kill_super,
  1186. .fs_flags = FS_REQUIRES_DEV,
  1187. .init_fs_context = zonefs_init_fs_context,
  1188. .parameters = zonefs_param_spec,
  1189. };
  1190. static int __init zonefs_init_inodecache(void)
  1191. {
  1192. zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache",
  1193. sizeof(struct zonefs_inode_info), 0,
  1194. SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
  1195. NULL);
  1196. if (zonefs_inode_cachep == NULL)
  1197. return -ENOMEM;
  1198. return 0;
  1199. }
  1200. static void zonefs_destroy_inodecache(void)
  1201. {
  1202. /*
  1203. * Make sure all delayed rcu free inodes are flushed before we
  1204. * destroy the inode cache.
  1205. */
  1206. rcu_barrier();
  1207. kmem_cache_destroy(zonefs_inode_cachep);
  1208. }
  1209. static int __init zonefs_init(void)
  1210. {
  1211. int ret;
  1212. BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
  1213. ret = zonefs_init_inodecache();
  1214. if (ret)
  1215. return ret;
  1216. ret = zonefs_sysfs_init();
  1217. if (ret)
  1218. goto destroy_inodecache;
  1219. ret = register_filesystem(&zonefs_type);
  1220. if (ret)
  1221. goto sysfs_exit;
  1222. return 0;
  1223. sysfs_exit:
  1224. zonefs_sysfs_exit();
  1225. destroy_inodecache:
  1226. zonefs_destroy_inodecache();
  1227. return ret;
  1228. }
  1229. static void __exit zonefs_exit(void)
  1230. {
  1231. unregister_filesystem(&zonefs_type);
  1232. zonefs_sysfs_exit();
  1233. zonefs_destroy_inodecache();
  1234. }
  1235. MODULE_AUTHOR("Damien Le Moal");
  1236. MODULE_DESCRIPTION("Zone file system for zoned block devices");
  1237. MODULE_LICENSE("GPL");
  1238. MODULE_ALIAS_FS("zonefs");
  1239. module_init(zonefs_init);
  1240. module_exit(zonefs_exit);