zloop.c 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2025, Christoph Hellwig.
  4. * Copyright (c) 2025, Western Digital Corporation or its affiliates.
  5. *
  6. * Zoned Loop Device driver - exports a zoned block device using one file per
  7. * zone as backing storage.
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/module.h>
  11. #include <linux/blk-mq.h>
  12. #include <linux/blkzoned.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/miscdevice.h>
  15. #include <linux/falloc.h>
  16. #include <linux/mutex.h>
  17. #include <linux/parser.h>
  18. #include <linux/seq_file.h>
  19. /*
  20. * Options for adding (and removing) a device.
  21. */
  22. enum {
  23. ZLOOP_OPT_ERR = 0,
  24. ZLOOP_OPT_ID = (1 << 0),
  25. ZLOOP_OPT_CAPACITY = (1 << 1),
  26. ZLOOP_OPT_ZONE_SIZE = (1 << 2),
  27. ZLOOP_OPT_ZONE_CAPACITY = (1 << 3),
  28. ZLOOP_OPT_NR_CONV_ZONES = (1 << 4),
  29. ZLOOP_OPT_BASE_DIR = (1 << 5),
  30. ZLOOP_OPT_NR_QUEUES = (1 << 6),
  31. ZLOOP_OPT_QUEUE_DEPTH = (1 << 7),
  32. ZLOOP_OPT_BUFFERED_IO = (1 << 8),
  33. ZLOOP_OPT_ZONE_APPEND = (1 << 9),
  34. ZLOOP_OPT_ORDERED_ZONE_APPEND = (1 << 10),
  35. };
  36. static const match_table_t zloop_opt_tokens = {
  37. { ZLOOP_OPT_ID, "id=%d" },
  38. { ZLOOP_OPT_CAPACITY, "capacity_mb=%u" },
  39. { ZLOOP_OPT_ZONE_SIZE, "zone_size_mb=%u" },
  40. { ZLOOP_OPT_ZONE_CAPACITY, "zone_capacity_mb=%u" },
  41. { ZLOOP_OPT_NR_CONV_ZONES, "conv_zones=%u" },
  42. { ZLOOP_OPT_BASE_DIR, "base_dir=%s" },
  43. { ZLOOP_OPT_NR_QUEUES, "nr_queues=%u" },
  44. { ZLOOP_OPT_QUEUE_DEPTH, "queue_depth=%u" },
  45. { ZLOOP_OPT_BUFFERED_IO, "buffered_io" },
  46. { ZLOOP_OPT_ZONE_APPEND, "zone_append=%u" },
  47. { ZLOOP_OPT_ORDERED_ZONE_APPEND, "ordered_zone_append" },
  48. { ZLOOP_OPT_ERR, NULL }
  49. };
  50. /* Default values for the "add" operation. */
  51. #define ZLOOP_DEF_ID -1
  52. #define ZLOOP_DEF_ZONE_SIZE ((256ULL * SZ_1M) >> SECTOR_SHIFT)
  53. #define ZLOOP_DEF_NR_ZONES 64
  54. #define ZLOOP_DEF_NR_CONV_ZONES 8
  55. #define ZLOOP_DEF_BASE_DIR "/var/local/zloop"
  56. #define ZLOOP_DEF_NR_QUEUES 1
  57. #define ZLOOP_DEF_QUEUE_DEPTH 128
  58. #define ZLOOP_DEF_BUFFERED_IO false
  59. #define ZLOOP_DEF_ZONE_APPEND true
  60. #define ZLOOP_DEF_ORDERED_ZONE_APPEND false
  61. /* Arbitrary limit on the zone size (16GB). */
  62. #define ZLOOP_MAX_ZONE_SIZE_MB 16384
  63. struct zloop_options {
  64. unsigned int mask;
  65. int id;
  66. sector_t capacity;
  67. sector_t zone_size;
  68. sector_t zone_capacity;
  69. unsigned int nr_conv_zones;
  70. char *base_dir;
  71. unsigned int nr_queues;
  72. unsigned int queue_depth;
  73. bool buffered_io;
  74. bool zone_append;
  75. bool ordered_zone_append;
  76. };
  77. /*
  78. * Device states.
  79. */
  80. enum {
  81. Zlo_creating = 0,
  82. Zlo_live,
  83. Zlo_deleting,
  84. };
  85. enum zloop_zone_flags {
  86. ZLOOP_ZONE_CONV = 0,
  87. ZLOOP_ZONE_SEQ_ERROR,
  88. };
  89. struct zloop_zone {
  90. struct file *file;
  91. unsigned long flags;
  92. struct mutex lock;
  93. spinlock_t wp_lock;
  94. enum blk_zone_cond cond;
  95. sector_t start;
  96. sector_t wp;
  97. gfp_t old_gfp_mask;
  98. };
  99. struct zloop_device {
  100. unsigned int id;
  101. unsigned int state;
  102. struct blk_mq_tag_set tag_set;
  103. struct gendisk *disk;
  104. struct workqueue_struct *workqueue;
  105. bool buffered_io;
  106. bool zone_append;
  107. bool ordered_zone_append;
  108. const char *base_dir;
  109. struct file *data_dir;
  110. unsigned int zone_shift;
  111. sector_t zone_size;
  112. sector_t zone_capacity;
  113. unsigned int nr_zones;
  114. unsigned int nr_conv_zones;
  115. unsigned int block_size;
  116. struct zloop_zone zones[] __counted_by(nr_zones);
  117. };
  118. struct zloop_cmd {
  119. struct work_struct work;
  120. atomic_t ref;
  121. sector_t sector;
  122. sector_t nr_sectors;
  123. long ret;
  124. struct kiocb iocb;
  125. struct bio_vec *bvec;
  126. };
  127. static DEFINE_IDR(zloop_index_idr);
  128. static DEFINE_MUTEX(zloop_ctl_mutex);
  129. static unsigned int rq_zone_no(struct request *rq)
  130. {
  131. struct zloop_device *zlo = rq->q->queuedata;
  132. return blk_rq_pos(rq) >> zlo->zone_shift;
  133. }
  134. static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
  135. {
  136. struct zloop_zone *zone = &zlo->zones[zone_no];
  137. struct kstat stat;
  138. sector_t file_sectors;
  139. unsigned long flags;
  140. int ret;
  141. lockdep_assert_held(&zone->lock);
  142. ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
  143. if (ret < 0) {
  144. pr_err("Failed to get zone %u file stat (err=%d)\n",
  145. zone_no, ret);
  146. set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
  147. return ret;
  148. }
  149. file_sectors = stat.size >> SECTOR_SHIFT;
  150. if (file_sectors > zlo->zone_capacity) {
  151. pr_err("Zone %u file too large (%llu sectors > %llu)\n",
  152. zone_no, file_sectors, zlo->zone_capacity);
  153. return -EINVAL;
  154. }
  155. if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
  156. pr_err("Zone %u file size not aligned to block size %u\n",
  157. zone_no, zlo->block_size);
  158. return -EINVAL;
  159. }
  160. spin_lock_irqsave(&zone->wp_lock, flags);
  161. if (!file_sectors) {
  162. zone->cond = BLK_ZONE_COND_EMPTY;
  163. zone->wp = zone->start;
  164. } else if (file_sectors == zlo->zone_capacity) {
  165. zone->cond = BLK_ZONE_COND_FULL;
  166. zone->wp = ULLONG_MAX;
  167. } else {
  168. zone->cond = BLK_ZONE_COND_CLOSED;
  169. zone->wp = zone->start + file_sectors;
  170. }
  171. spin_unlock_irqrestore(&zone->wp_lock, flags);
  172. return 0;
  173. }
  174. static int zloop_open_zone(struct zloop_device *zlo, unsigned int zone_no)
  175. {
  176. struct zloop_zone *zone = &zlo->zones[zone_no];
  177. int ret = 0;
  178. if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
  179. return -EIO;
  180. mutex_lock(&zone->lock);
  181. if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
  182. ret = zloop_update_seq_zone(zlo, zone_no);
  183. if (ret)
  184. goto unlock;
  185. }
  186. switch (zone->cond) {
  187. case BLK_ZONE_COND_EXP_OPEN:
  188. break;
  189. case BLK_ZONE_COND_EMPTY:
  190. case BLK_ZONE_COND_CLOSED:
  191. case BLK_ZONE_COND_IMP_OPEN:
  192. zone->cond = BLK_ZONE_COND_EXP_OPEN;
  193. break;
  194. case BLK_ZONE_COND_FULL:
  195. default:
  196. ret = -EIO;
  197. break;
  198. }
  199. unlock:
  200. mutex_unlock(&zone->lock);
  201. return ret;
  202. }
  203. static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
  204. {
  205. struct zloop_zone *zone = &zlo->zones[zone_no];
  206. unsigned long flags;
  207. int ret = 0;
  208. if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
  209. return -EIO;
  210. mutex_lock(&zone->lock);
  211. if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
  212. ret = zloop_update_seq_zone(zlo, zone_no);
  213. if (ret)
  214. goto unlock;
  215. }
  216. switch (zone->cond) {
  217. case BLK_ZONE_COND_CLOSED:
  218. break;
  219. case BLK_ZONE_COND_IMP_OPEN:
  220. case BLK_ZONE_COND_EXP_OPEN:
  221. spin_lock_irqsave(&zone->wp_lock, flags);
  222. if (zone->wp == zone->start)
  223. zone->cond = BLK_ZONE_COND_EMPTY;
  224. else
  225. zone->cond = BLK_ZONE_COND_CLOSED;
  226. spin_unlock_irqrestore(&zone->wp_lock, flags);
  227. break;
  228. case BLK_ZONE_COND_EMPTY:
  229. case BLK_ZONE_COND_FULL:
  230. default:
  231. ret = -EIO;
  232. break;
  233. }
  234. unlock:
  235. mutex_unlock(&zone->lock);
  236. return ret;
  237. }
  238. static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
  239. {
  240. struct zloop_zone *zone = &zlo->zones[zone_no];
  241. unsigned long flags;
  242. int ret = 0;
  243. if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
  244. return -EIO;
  245. mutex_lock(&zone->lock);
  246. if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
  247. zone->cond == BLK_ZONE_COND_EMPTY)
  248. goto unlock;
  249. if (vfs_truncate(&zone->file->f_path, 0)) {
  250. set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
  251. ret = -EIO;
  252. goto unlock;
  253. }
  254. spin_lock_irqsave(&zone->wp_lock, flags);
  255. zone->cond = BLK_ZONE_COND_EMPTY;
  256. zone->wp = zone->start;
  257. clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
  258. spin_unlock_irqrestore(&zone->wp_lock, flags);
  259. unlock:
  260. mutex_unlock(&zone->lock);
  261. return ret;
  262. }
  263. static int zloop_reset_all_zones(struct zloop_device *zlo)
  264. {
  265. unsigned int i;
  266. int ret;
  267. for (i = zlo->nr_conv_zones; i < zlo->nr_zones; i++) {
  268. ret = zloop_reset_zone(zlo, i);
  269. if (ret)
  270. return ret;
  271. }
  272. return 0;
  273. }
  274. static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
  275. {
  276. struct zloop_zone *zone = &zlo->zones[zone_no];
  277. unsigned long flags;
  278. int ret = 0;
  279. if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
  280. return -EIO;
  281. mutex_lock(&zone->lock);
  282. if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
  283. zone->cond == BLK_ZONE_COND_FULL)
  284. goto unlock;
  285. if (vfs_truncate(&zone->file->f_path, zlo->zone_size << SECTOR_SHIFT)) {
  286. set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
  287. ret = -EIO;
  288. goto unlock;
  289. }
  290. spin_lock_irqsave(&zone->wp_lock, flags);
  291. zone->cond = BLK_ZONE_COND_FULL;
  292. zone->wp = ULLONG_MAX;
  293. clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
  294. spin_unlock_irqrestore(&zone->wp_lock, flags);
  295. unlock:
  296. mutex_unlock(&zone->lock);
  297. return ret;
  298. }
  299. static void zloop_put_cmd(struct zloop_cmd *cmd)
  300. {
  301. struct request *rq = blk_mq_rq_from_pdu(cmd);
  302. if (!atomic_dec_and_test(&cmd->ref))
  303. return;
  304. kfree(cmd->bvec);
  305. cmd->bvec = NULL;
  306. if (likely(!blk_should_fake_timeout(rq->q)))
  307. blk_mq_complete_request(rq);
  308. }
  309. static void zloop_rw_complete(struct kiocb *iocb, long ret)
  310. {
  311. struct zloop_cmd *cmd = container_of(iocb, struct zloop_cmd, iocb);
  312. cmd->ret = ret;
  313. zloop_put_cmd(cmd);
  314. }
  315. static void zloop_rw(struct zloop_cmd *cmd)
  316. {
  317. struct request *rq = blk_mq_rq_from_pdu(cmd);
  318. struct zloop_device *zlo = rq->q->queuedata;
  319. unsigned int zone_no = rq_zone_no(rq);
  320. sector_t sector = blk_rq_pos(rq);
  321. sector_t nr_sectors = blk_rq_sectors(rq);
  322. bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
  323. bool is_write = req_op(rq) == REQ_OP_WRITE || is_append;
  324. int rw = is_write ? ITER_SOURCE : ITER_DEST;
  325. struct req_iterator rq_iter;
  326. struct zloop_zone *zone;
  327. struct iov_iter iter;
  328. struct bio_vec tmp;
  329. unsigned long flags;
  330. sector_t zone_end;
  331. unsigned int nr_bvec;
  332. int ret;
  333. atomic_set(&cmd->ref, 2);
  334. cmd->sector = sector;
  335. cmd->nr_sectors = nr_sectors;
  336. cmd->ret = 0;
  337. if (WARN_ON_ONCE(is_append && !zlo->zone_append)) {
  338. ret = -EIO;
  339. goto out;
  340. }
  341. /* We should never get an I/O beyond the device capacity. */
  342. if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
  343. ret = -EIO;
  344. goto out;
  345. }
  346. zone = &zlo->zones[zone_no];
  347. zone_end = zone->start + zlo->zone_capacity;
  348. /*
  349. * The block layer should never send requests that are not fully
  350. * contained within the zone.
  351. */
  352. if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
  353. ret = -EIO;
  354. goto out;
  355. }
  356. if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
  357. mutex_lock(&zone->lock);
  358. ret = zloop_update_seq_zone(zlo, zone_no);
  359. mutex_unlock(&zone->lock);
  360. if (ret)
  361. goto out;
  362. }
  363. if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
  364. mutex_lock(&zone->lock);
  365. spin_lock_irqsave(&zone->wp_lock, flags);
  366. /*
  367. * Zone append operations always go at the current write
  368. * pointer, but regular write operations must already be
  369. * aligned to the write pointer when submitted.
  370. */
  371. if (is_append) {
  372. /*
  373. * If ordered zone append is in use, we already checked
  374. * and set the target sector in zloop_queue_rq().
  375. */
  376. if (!zlo->ordered_zone_append) {
  377. if (zone->cond == BLK_ZONE_COND_FULL ||
  378. zone->wp + nr_sectors > zone_end) {
  379. spin_unlock_irqrestore(&zone->wp_lock,
  380. flags);
  381. ret = -EIO;
  382. goto unlock;
  383. }
  384. sector = zone->wp;
  385. }
  386. cmd->sector = sector;
  387. } else if (sector != zone->wp) {
  388. spin_unlock_irqrestore(&zone->wp_lock, flags);
  389. pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
  390. zone_no, sector, zone->wp);
  391. ret = -EIO;
  392. goto unlock;
  393. }
  394. /* Implicitly open the target zone. */
  395. if (zone->cond == BLK_ZONE_COND_CLOSED ||
  396. zone->cond == BLK_ZONE_COND_EMPTY)
  397. zone->cond = BLK_ZONE_COND_IMP_OPEN;
  398. /*
  399. * Advance the write pointer, unless ordered zone append is in
  400. * use. If the write fails, the write pointer position will be
  401. * corrected when the next I/O starts execution.
  402. */
  403. if (!is_append || !zlo->ordered_zone_append) {
  404. zone->wp += nr_sectors;
  405. if (zone->wp == zone_end) {
  406. zone->cond = BLK_ZONE_COND_FULL;
  407. zone->wp = ULLONG_MAX;
  408. }
  409. }
  410. spin_unlock_irqrestore(&zone->wp_lock, flags);
  411. }
  412. nr_bvec = blk_rq_nr_bvec(rq);
  413. if (rq->bio != rq->biotail) {
  414. struct bio_vec *bvec;
  415. cmd->bvec = kmalloc_objs(*cmd->bvec, nr_bvec, GFP_NOIO);
  416. if (!cmd->bvec) {
  417. ret = -EIO;
  418. goto unlock;
  419. }
  420. /*
  421. * The bios of the request may be started from the middle of
  422. * the 'bvec' because of bio splitting, so we can't directly
  423. * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
  424. * API will take care of all details for us.
  425. */
  426. bvec = cmd->bvec;
  427. rq_for_each_bvec(tmp, rq, rq_iter) {
  428. *bvec = tmp;
  429. bvec++;
  430. }
  431. iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq));
  432. } else {
  433. /*
  434. * Same here, this bio may be started from the middle of the
  435. * 'bvec' because of bio splitting, so offset from the bvec
  436. * must be passed to iov iterator
  437. */
  438. iov_iter_bvec(&iter, rw,
  439. __bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter),
  440. nr_bvec, blk_rq_bytes(rq));
  441. iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
  442. }
  443. cmd->iocb.ki_pos = (sector - zone->start) << SECTOR_SHIFT;
  444. cmd->iocb.ki_filp = zone->file;
  445. cmd->iocb.ki_complete = zloop_rw_complete;
  446. if (!zlo->buffered_io)
  447. cmd->iocb.ki_flags = IOCB_DIRECT;
  448. cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
  449. if (rw == ITER_SOURCE)
  450. ret = zone->file->f_op->write_iter(&cmd->iocb, &iter);
  451. else
  452. ret = zone->file->f_op->read_iter(&cmd->iocb, &iter);
  453. unlock:
  454. if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write)
  455. mutex_unlock(&zone->lock);
  456. out:
  457. if (ret != -EIOCBQUEUED)
  458. zloop_rw_complete(&cmd->iocb, ret);
  459. zloop_put_cmd(cmd);
  460. }
  461. /*
  462. * Sync the entire FS containing the zone files instead of walking all files.
  463. */
  464. static int zloop_flush(struct zloop_device *zlo)
  465. {
  466. struct super_block *sb = file_inode(zlo->data_dir)->i_sb;
  467. int ret;
  468. down_read(&sb->s_umount);
  469. ret = sync_filesystem(sb);
  470. up_read(&sb->s_umount);
  471. return ret;
  472. }
  473. static void zloop_handle_cmd(struct zloop_cmd *cmd)
  474. {
  475. struct request *rq = blk_mq_rq_from_pdu(cmd);
  476. struct zloop_device *zlo = rq->q->queuedata;
  477. /* We can block in this context, so ignore REQ_NOWAIT. */
  478. if (rq->cmd_flags & REQ_NOWAIT)
  479. rq->cmd_flags &= ~REQ_NOWAIT;
  480. switch (req_op(rq)) {
  481. case REQ_OP_READ:
  482. case REQ_OP_WRITE:
  483. case REQ_OP_ZONE_APPEND:
  484. /*
  485. * zloop_rw() always executes asynchronously or completes
  486. * directly.
  487. */
  488. zloop_rw(cmd);
  489. return;
  490. case REQ_OP_FLUSH:
  491. cmd->ret = zloop_flush(zlo);
  492. break;
  493. case REQ_OP_ZONE_RESET:
  494. cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq));
  495. break;
  496. case REQ_OP_ZONE_RESET_ALL:
  497. cmd->ret = zloop_reset_all_zones(zlo);
  498. break;
  499. case REQ_OP_ZONE_FINISH:
  500. cmd->ret = zloop_finish_zone(zlo, rq_zone_no(rq));
  501. break;
  502. case REQ_OP_ZONE_OPEN:
  503. cmd->ret = zloop_open_zone(zlo, rq_zone_no(rq));
  504. break;
  505. case REQ_OP_ZONE_CLOSE:
  506. cmd->ret = zloop_close_zone(zlo, rq_zone_no(rq));
  507. break;
  508. default:
  509. WARN_ON_ONCE(1);
  510. pr_err("Unsupported operation %d\n", req_op(rq));
  511. cmd->ret = -EOPNOTSUPP;
  512. break;
  513. }
  514. blk_mq_complete_request(rq);
  515. }
  516. static void zloop_cmd_workfn(struct work_struct *work)
  517. {
  518. struct zloop_cmd *cmd = container_of(work, struct zloop_cmd, work);
  519. int orig_flags = current->flags;
  520. current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
  521. zloop_handle_cmd(cmd);
  522. current->flags = orig_flags;
  523. }
  524. static void zloop_complete_rq(struct request *rq)
  525. {
  526. struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
  527. struct zloop_device *zlo = rq->q->queuedata;
  528. unsigned int zone_no = cmd->sector >> zlo->zone_shift;
  529. struct zloop_zone *zone = &zlo->zones[zone_no];
  530. blk_status_t sts = BLK_STS_OK;
  531. switch (req_op(rq)) {
  532. case REQ_OP_READ:
  533. if (cmd->ret < 0)
  534. pr_err("Zone %u: failed read sector %llu, %llu sectors\n",
  535. zone_no, cmd->sector, cmd->nr_sectors);
  536. if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
  537. /* short read */
  538. struct bio *bio;
  539. __rq_for_each_bio(bio, rq)
  540. zero_fill_bio(bio);
  541. }
  542. break;
  543. case REQ_OP_WRITE:
  544. case REQ_OP_ZONE_APPEND:
  545. if (cmd->ret < 0)
  546. pr_err("Zone %u: failed %swrite sector %llu, %llu sectors\n",
  547. zone_no,
  548. req_op(rq) == REQ_OP_WRITE ? "" : "append ",
  549. cmd->sector, cmd->nr_sectors);
  550. if (cmd->ret >= 0 && cmd->ret != blk_rq_bytes(rq)) {
  551. pr_err("Zone %u: partial write %ld/%u B\n",
  552. zone_no, cmd->ret, blk_rq_bytes(rq));
  553. cmd->ret = -EIO;
  554. }
  555. if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
  556. /*
  557. * A write to a sequential zone file failed: mark the
  558. * zone as having an error. This will be corrected and
  559. * cleared when the next IO is submitted.
  560. */
  561. set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
  562. break;
  563. }
  564. if (req_op(rq) == REQ_OP_ZONE_APPEND)
  565. rq->__sector = cmd->sector;
  566. break;
  567. default:
  568. break;
  569. }
  570. if (cmd->ret < 0)
  571. sts = errno_to_blk_status(cmd->ret);
  572. blk_mq_end_request(rq, sts);
  573. }
  574. static bool zloop_set_zone_append_sector(struct request *rq)
  575. {
  576. struct zloop_device *zlo = rq->q->queuedata;
  577. unsigned int zone_no = rq_zone_no(rq);
  578. struct zloop_zone *zone = &zlo->zones[zone_no];
  579. sector_t zone_end = zone->start + zlo->zone_capacity;
  580. sector_t nr_sectors = blk_rq_sectors(rq);
  581. unsigned long flags;
  582. spin_lock_irqsave(&zone->wp_lock, flags);
  583. if (zone->cond == BLK_ZONE_COND_FULL ||
  584. zone->wp + nr_sectors > zone_end) {
  585. spin_unlock_irqrestore(&zone->wp_lock, flags);
  586. return false;
  587. }
  588. rq->__sector = zone->wp;
  589. zone->wp += blk_rq_sectors(rq);
  590. if (zone->wp >= zone_end) {
  591. zone->cond = BLK_ZONE_COND_FULL;
  592. zone->wp = ULLONG_MAX;
  593. }
  594. spin_unlock_irqrestore(&zone->wp_lock, flags);
  595. return true;
  596. }
  597. static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
  598. const struct blk_mq_queue_data *bd)
  599. {
  600. struct request *rq = bd->rq;
  601. struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
  602. struct zloop_device *zlo = rq->q->queuedata;
  603. if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting)
  604. return BLK_STS_IOERR;
  605. /*
  606. * If we need to strongly order zone append operations, set the request
  607. * sector to the zone write pointer location now instead of when the
  608. * command work runs.
  609. */
  610. if (zlo->ordered_zone_append && req_op(rq) == REQ_OP_ZONE_APPEND) {
  611. if (!zloop_set_zone_append_sector(rq))
  612. return BLK_STS_IOERR;
  613. }
  614. blk_mq_start_request(rq);
  615. INIT_WORK(&cmd->work, zloop_cmd_workfn);
  616. queue_work(zlo->workqueue, &cmd->work);
  617. return BLK_STS_OK;
  618. }
  619. static const struct blk_mq_ops zloop_mq_ops = {
  620. .queue_rq = zloop_queue_rq,
  621. .complete = zloop_complete_rq,
  622. };
  623. static int zloop_open(struct gendisk *disk, blk_mode_t mode)
  624. {
  625. struct zloop_device *zlo = disk->private_data;
  626. int ret;
  627. ret = mutex_lock_killable(&zloop_ctl_mutex);
  628. if (ret)
  629. return ret;
  630. if (zlo->state != Zlo_live)
  631. ret = -ENXIO;
  632. mutex_unlock(&zloop_ctl_mutex);
  633. return ret;
  634. }
  635. static int zloop_report_zones(struct gendisk *disk, sector_t sector,
  636. unsigned int nr_zones, struct blk_report_zones_args *args)
  637. {
  638. struct zloop_device *zlo = disk->private_data;
  639. struct blk_zone blkz = {};
  640. unsigned int first, i;
  641. unsigned long flags;
  642. int ret;
  643. first = disk_zone_no(disk, sector);
  644. if (first >= zlo->nr_zones)
  645. return 0;
  646. nr_zones = min(nr_zones, zlo->nr_zones - first);
  647. for (i = 0; i < nr_zones; i++) {
  648. unsigned int zone_no = first + i;
  649. struct zloop_zone *zone = &zlo->zones[zone_no];
  650. mutex_lock(&zone->lock);
  651. if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
  652. ret = zloop_update_seq_zone(zlo, zone_no);
  653. if (ret) {
  654. mutex_unlock(&zone->lock);
  655. return ret;
  656. }
  657. }
  658. blkz.start = zone->start;
  659. blkz.len = zlo->zone_size;
  660. spin_lock_irqsave(&zone->wp_lock, flags);
  661. blkz.wp = zone->wp;
  662. spin_unlock_irqrestore(&zone->wp_lock, flags);
  663. blkz.cond = zone->cond;
  664. if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
  665. blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
  666. blkz.capacity = zlo->zone_size;
  667. } else {
  668. blkz.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
  669. blkz.capacity = zlo->zone_capacity;
  670. }
  671. mutex_unlock(&zone->lock);
  672. ret = disk_report_zone(disk, &blkz, i, args);
  673. if (ret)
  674. return ret;
  675. }
  676. return nr_zones;
  677. }
  678. static void zloop_free_disk(struct gendisk *disk)
  679. {
  680. struct zloop_device *zlo = disk->private_data;
  681. unsigned int i;
  682. blk_mq_free_tag_set(&zlo->tag_set);
  683. for (i = 0; i < zlo->nr_zones; i++) {
  684. struct zloop_zone *zone = &zlo->zones[i];
  685. mapping_set_gfp_mask(zone->file->f_mapping,
  686. zone->old_gfp_mask);
  687. fput(zone->file);
  688. }
  689. fput(zlo->data_dir);
  690. destroy_workqueue(zlo->workqueue);
  691. kfree(zlo->base_dir);
  692. kvfree(zlo);
  693. }
  694. static const struct block_device_operations zloop_fops = {
  695. .owner = THIS_MODULE,
  696. .open = zloop_open,
  697. .report_zones = zloop_report_zones,
  698. .free_disk = zloop_free_disk,
  699. };
  700. __printf(3, 4)
  701. static struct file *zloop_filp_open_fmt(int oflags, umode_t mode,
  702. const char *fmt, ...)
  703. {
  704. struct file *file;
  705. va_list ap;
  706. char *p;
  707. va_start(ap, fmt);
  708. p = kvasprintf(GFP_KERNEL, fmt, ap);
  709. va_end(ap);
  710. if (!p)
  711. return ERR_PTR(-ENOMEM);
  712. file = filp_open(p, oflags, mode);
  713. kfree(p);
  714. return file;
  715. }
  716. static int zloop_get_block_size(struct zloop_device *zlo,
  717. struct zloop_zone *zone)
  718. {
  719. struct block_device *sb_bdev = zone->file->f_mapping->host->i_sb->s_bdev;
  720. struct kstat st;
  721. /*
  722. * If the FS block size is lower than or equal to 4K, use that as the
  723. * device block size. Otherwise, fallback to the FS direct IO alignment
  724. * constraint if that is provided, and to the FS underlying device
  725. * physical block size if the direct IO alignment is unknown.
  726. */
  727. if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
  728. zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize;
  729. else if (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
  730. (st.result_mask & STATX_DIOALIGN))
  731. zlo->block_size = st.dio_offset_align;
  732. else if (sb_bdev)
  733. zlo->block_size = bdev_physical_block_size(sb_bdev);
  734. else
  735. zlo->block_size = SECTOR_SIZE;
  736. if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
  737. pr_err("Zone capacity is not aligned to block size %u\n",
  738. zlo->block_size);
  739. return -EINVAL;
  740. }
  741. return 0;
  742. }
  743. static int zloop_init_zone(struct zloop_device *zlo, struct zloop_options *opts,
  744. unsigned int zone_no, bool restore)
  745. {
  746. struct zloop_zone *zone = &zlo->zones[zone_no];
  747. int oflags = O_RDWR;
  748. struct kstat stat;
  749. sector_t file_sectors;
  750. int ret;
  751. mutex_init(&zone->lock);
  752. spin_lock_init(&zone->wp_lock);
  753. zone->start = (sector_t)zone_no << zlo->zone_shift;
  754. if (!restore)
  755. oflags |= O_CREAT;
  756. if (!opts->buffered_io)
  757. oflags |= O_DIRECT;
  758. if (zone_no < zlo->nr_conv_zones) {
  759. /* Conventional zone file. */
  760. set_bit(ZLOOP_ZONE_CONV, &zone->flags);
  761. zone->cond = BLK_ZONE_COND_NOT_WP;
  762. zone->wp = U64_MAX;
  763. zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/cnv-%06u",
  764. zlo->base_dir, zlo->id, zone_no);
  765. if (IS_ERR(zone->file)) {
  766. pr_err("Failed to open zone %u file %s/%u/cnv-%06u (err=%ld)",
  767. zone_no, zlo->base_dir, zlo->id, zone_no,
  768. PTR_ERR(zone->file));
  769. return PTR_ERR(zone->file);
  770. }
  771. if (!zlo->block_size) {
  772. ret = zloop_get_block_size(zlo, zone);
  773. if (ret)
  774. return ret;
  775. }
  776. ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0);
  777. if (ret < 0) {
  778. pr_err("Failed to get zone %u file stat\n", zone_no);
  779. return ret;
  780. }
  781. file_sectors = stat.size >> SECTOR_SHIFT;
  782. if (restore && file_sectors != zlo->zone_size) {
  783. pr_err("Invalid conventional zone %u file size (%llu sectors != %llu)\n",
  784. zone_no, file_sectors, zlo->zone_capacity);
  785. return ret;
  786. }
  787. ret = vfs_truncate(&zone->file->f_path,
  788. zlo->zone_size << SECTOR_SHIFT);
  789. if (ret < 0) {
  790. pr_err("Failed to truncate zone %u file (err=%d)\n",
  791. zone_no, ret);
  792. return ret;
  793. }
  794. return 0;
  795. }
  796. /* Sequential zone file. */
  797. zone->file = zloop_filp_open_fmt(oflags, 0600, "%s/%u/seq-%06u",
  798. zlo->base_dir, zlo->id, zone_no);
  799. if (IS_ERR(zone->file)) {
  800. pr_err("Failed to open zone %u file %s/%u/seq-%06u (err=%ld)",
  801. zone_no, zlo->base_dir, zlo->id, zone_no,
  802. PTR_ERR(zone->file));
  803. return PTR_ERR(zone->file);
  804. }
  805. if (!zlo->block_size) {
  806. ret = zloop_get_block_size(zlo, zone);
  807. if (ret)
  808. return ret;
  809. }
  810. zloop_get_block_size(zlo, zone);
  811. mutex_lock(&zone->lock);
  812. ret = zloop_update_seq_zone(zlo, zone_no);
  813. mutex_unlock(&zone->lock);
  814. return ret;
  815. }
  816. static bool zloop_dev_exists(struct zloop_device *zlo)
  817. {
  818. struct file *cnv, *seq;
  819. bool exists;
  820. cnv = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/cnv-%06u",
  821. zlo->base_dir, zlo->id, 0);
  822. seq = zloop_filp_open_fmt(O_RDONLY, 0600, "%s/%u/seq-%06u",
  823. zlo->base_dir, zlo->id, 0);
  824. exists = !IS_ERR(cnv) || !IS_ERR(seq);
  825. if (!IS_ERR(cnv))
  826. fput(cnv);
  827. if (!IS_ERR(seq))
  828. fput(seq);
  829. return exists;
  830. }
  831. static int zloop_ctl_add(struct zloop_options *opts)
  832. {
  833. struct queue_limits lim = {
  834. .max_hw_sectors = SZ_1M >> SECTOR_SHIFT,
  835. .chunk_sectors = opts->zone_size,
  836. .features = BLK_FEAT_ZONED | BLK_FEAT_WRITE_CACHE,
  837. };
  838. unsigned int nr_zones, i, j;
  839. struct zloop_device *zlo;
  840. int ret = -EINVAL;
  841. bool restore;
  842. __module_get(THIS_MODULE);
  843. nr_zones = opts->capacity >> ilog2(opts->zone_size);
  844. if (opts->nr_conv_zones >= nr_zones) {
  845. pr_err("Invalid number of conventional zones %u\n",
  846. opts->nr_conv_zones);
  847. goto out;
  848. }
  849. zlo = kvzalloc_flex(*zlo, zones, nr_zones);
  850. if (!zlo) {
  851. ret = -ENOMEM;
  852. goto out;
  853. }
  854. WRITE_ONCE(zlo->state, Zlo_creating);
  855. ret = mutex_lock_killable(&zloop_ctl_mutex);
  856. if (ret)
  857. goto out_free_dev;
  858. /* Allocate id, if @opts->id >= 0, we're requesting that specific id */
  859. if (opts->id >= 0) {
  860. ret = idr_alloc(&zloop_index_idr, zlo,
  861. opts->id, opts->id + 1, GFP_KERNEL);
  862. if (ret == -ENOSPC)
  863. ret = -EEXIST;
  864. } else {
  865. ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL);
  866. }
  867. mutex_unlock(&zloop_ctl_mutex);
  868. if (ret < 0)
  869. goto out_free_dev;
  870. zlo->id = ret;
  871. zlo->zone_shift = ilog2(opts->zone_size);
  872. zlo->zone_size = opts->zone_size;
  873. if (opts->zone_capacity)
  874. zlo->zone_capacity = opts->zone_capacity;
  875. else
  876. zlo->zone_capacity = zlo->zone_size;
  877. zlo->nr_zones = nr_zones;
  878. zlo->nr_conv_zones = opts->nr_conv_zones;
  879. zlo->buffered_io = opts->buffered_io;
  880. zlo->zone_append = opts->zone_append;
  881. if (zlo->zone_append)
  882. zlo->ordered_zone_append = opts->ordered_zone_append;
  883. zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
  884. opts->nr_queues * opts->queue_depth, zlo->id);
  885. if (!zlo->workqueue) {
  886. ret = -ENOMEM;
  887. goto out_free_idr;
  888. }
  889. if (opts->base_dir)
  890. zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL);
  891. else
  892. zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL);
  893. if (!zlo->base_dir) {
  894. ret = -ENOMEM;
  895. goto out_destroy_workqueue;
  896. }
  897. zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u",
  898. zlo->base_dir, zlo->id);
  899. if (IS_ERR(zlo->data_dir)) {
  900. ret = PTR_ERR(zlo->data_dir);
  901. pr_warn("Failed to open directory %s/%u (err=%d)\n",
  902. zlo->base_dir, zlo->id, ret);
  903. goto out_free_base_dir;
  904. }
  905. /*
  906. * If we already have zone files, we are restoring a device created by a
  907. * previous add operation. In this case, zloop_init_zone() will check
  908. * that the zone files are consistent with the zone configuration given.
  909. */
  910. restore = zloop_dev_exists(zlo);
  911. for (i = 0; i < nr_zones; i++) {
  912. ret = zloop_init_zone(zlo, opts, i, restore);
  913. if (ret)
  914. goto out_close_files;
  915. }
  916. lim.physical_block_size = zlo->block_size;
  917. lim.logical_block_size = zlo->block_size;
  918. if (zlo->zone_append)
  919. lim.max_hw_zone_append_sectors = lim.max_hw_sectors;
  920. zlo->tag_set.ops = &zloop_mq_ops;
  921. zlo->tag_set.nr_hw_queues = opts->nr_queues;
  922. zlo->tag_set.queue_depth = opts->queue_depth;
  923. zlo->tag_set.numa_node = NUMA_NO_NODE;
  924. zlo->tag_set.cmd_size = sizeof(struct zloop_cmd);
  925. zlo->tag_set.driver_data = zlo;
  926. ret = blk_mq_alloc_tag_set(&zlo->tag_set);
  927. if (ret) {
  928. pr_err("blk_mq_alloc_tag_set failed (err=%d)\n", ret);
  929. goto out_close_files;
  930. }
  931. zlo->disk = blk_mq_alloc_disk(&zlo->tag_set, &lim, zlo);
  932. if (IS_ERR(zlo->disk)) {
  933. pr_err("blk_mq_alloc_disk failed (err=%d)\n", ret);
  934. ret = PTR_ERR(zlo->disk);
  935. goto out_cleanup_tags;
  936. }
  937. zlo->disk->flags = GENHD_FL_NO_PART;
  938. zlo->disk->fops = &zloop_fops;
  939. zlo->disk->private_data = zlo;
  940. sprintf(zlo->disk->disk_name, "zloop%d", zlo->id);
  941. set_capacity(zlo->disk, (u64)lim.chunk_sectors * zlo->nr_zones);
  942. ret = blk_revalidate_disk_zones(zlo->disk);
  943. if (ret)
  944. goto out_cleanup_disk;
  945. ret = add_disk(zlo->disk);
  946. if (ret) {
  947. pr_err("add_disk failed (err=%d)\n", ret);
  948. goto out_cleanup_disk;
  949. }
  950. mutex_lock(&zloop_ctl_mutex);
  951. WRITE_ONCE(zlo->state, Zlo_live);
  952. mutex_unlock(&zloop_ctl_mutex);
  953. pr_info("zloop: device %d, %u zones of %llu MiB, %u B block size\n",
  954. zlo->id, zlo->nr_zones,
  955. ((sector_t)zlo->zone_size << SECTOR_SHIFT) >> 20,
  956. zlo->block_size);
  957. pr_info("zloop%d: using %s%s zone append\n",
  958. zlo->id,
  959. zlo->ordered_zone_append ? "ordered " : "",
  960. zlo->zone_append ? "native" : "emulated");
  961. return 0;
  962. out_cleanup_disk:
  963. put_disk(zlo->disk);
  964. out_cleanup_tags:
  965. blk_mq_free_tag_set(&zlo->tag_set);
  966. out_close_files:
  967. for (j = 0; j < i; j++) {
  968. struct zloop_zone *zone = &zlo->zones[j];
  969. if (!IS_ERR_OR_NULL(zone->file))
  970. fput(zone->file);
  971. }
  972. fput(zlo->data_dir);
  973. out_free_base_dir:
  974. kfree(zlo->base_dir);
  975. out_destroy_workqueue:
  976. destroy_workqueue(zlo->workqueue);
  977. out_free_idr:
  978. mutex_lock(&zloop_ctl_mutex);
  979. idr_remove(&zloop_index_idr, zlo->id);
  980. mutex_unlock(&zloop_ctl_mutex);
  981. out_free_dev:
  982. kvfree(zlo);
  983. out:
  984. module_put(THIS_MODULE);
  985. if (ret == -ENOENT)
  986. ret = -EINVAL;
  987. return ret;
  988. }
  989. static int zloop_ctl_remove(struct zloop_options *opts)
  990. {
  991. struct zloop_device *zlo;
  992. int ret;
  993. if (!(opts->mask & ZLOOP_OPT_ID)) {
  994. pr_err("No ID specified for remove\n");
  995. return -EINVAL;
  996. }
  997. if (opts->mask & ~ZLOOP_OPT_ID) {
  998. pr_err("Invalid option specified for remove\n");
  999. return -EINVAL;
  1000. }
  1001. ret = mutex_lock_killable(&zloop_ctl_mutex);
  1002. if (ret)
  1003. return ret;
  1004. zlo = idr_find(&zloop_index_idr, opts->id);
  1005. if (!zlo || zlo->state == Zlo_creating) {
  1006. ret = -ENODEV;
  1007. } else if (zlo->state == Zlo_deleting) {
  1008. ret = -EINVAL;
  1009. } else {
  1010. idr_remove(&zloop_index_idr, zlo->id);
  1011. WRITE_ONCE(zlo->state, Zlo_deleting);
  1012. }
  1013. mutex_unlock(&zloop_ctl_mutex);
  1014. if (ret)
  1015. return ret;
  1016. del_gendisk(zlo->disk);
  1017. put_disk(zlo->disk);
  1018. pr_info("Removed device %d\n", opts->id);
  1019. module_put(THIS_MODULE);
  1020. return 0;
  1021. }
  1022. static int zloop_parse_options(struct zloop_options *opts, const char *buf)
  1023. {
  1024. substring_t args[MAX_OPT_ARGS];
  1025. char *options, *o, *p;
  1026. unsigned int token;
  1027. int ret = 0;
  1028. /* Set defaults. */
  1029. opts->mask = 0;
  1030. opts->id = ZLOOP_DEF_ID;
  1031. opts->capacity = ZLOOP_DEF_ZONE_SIZE * ZLOOP_DEF_NR_ZONES;
  1032. opts->zone_size = ZLOOP_DEF_ZONE_SIZE;
  1033. opts->nr_conv_zones = ZLOOP_DEF_NR_CONV_ZONES;
  1034. opts->nr_queues = ZLOOP_DEF_NR_QUEUES;
  1035. opts->queue_depth = ZLOOP_DEF_QUEUE_DEPTH;
  1036. opts->buffered_io = ZLOOP_DEF_BUFFERED_IO;
  1037. opts->zone_append = ZLOOP_DEF_ZONE_APPEND;
  1038. opts->ordered_zone_append = ZLOOP_DEF_ORDERED_ZONE_APPEND;
  1039. if (!buf)
  1040. return 0;
  1041. /* Skip leading spaces before the options. */
  1042. while (isspace(*buf))
  1043. buf++;
  1044. options = o = kstrdup(buf, GFP_KERNEL);
  1045. if (!options)
  1046. return -ENOMEM;
  1047. /* Parse the options, doing only some light invalid value checks. */
  1048. while ((p = strsep(&o, ",\n")) != NULL) {
  1049. if (!*p)
  1050. continue;
  1051. token = match_token(p, zloop_opt_tokens, args);
  1052. opts->mask |= token;
  1053. switch (token) {
  1054. case ZLOOP_OPT_ID:
  1055. if (match_int(args, &opts->id)) {
  1056. ret = -EINVAL;
  1057. goto out;
  1058. }
  1059. break;
  1060. case ZLOOP_OPT_CAPACITY:
  1061. if (match_uint(args, &token)) {
  1062. ret = -EINVAL;
  1063. goto out;
  1064. }
  1065. if (!token) {
  1066. pr_err("Invalid capacity\n");
  1067. ret = -EINVAL;
  1068. goto out;
  1069. }
  1070. opts->capacity =
  1071. ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
  1072. break;
  1073. case ZLOOP_OPT_ZONE_SIZE:
  1074. if (match_uint(args, &token)) {
  1075. ret = -EINVAL;
  1076. goto out;
  1077. }
  1078. if (!token || token > ZLOOP_MAX_ZONE_SIZE_MB ||
  1079. !is_power_of_2(token)) {
  1080. pr_err("Invalid zone size %u\n", token);
  1081. ret = -EINVAL;
  1082. goto out;
  1083. }
  1084. opts->zone_size =
  1085. ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
  1086. break;
  1087. case ZLOOP_OPT_ZONE_CAPACITY:
  1088. if (match_uint(args, &token)) {
  1089. ret = -EINVAL;
  1090. goto out;
  1091. }
  1092. if (!token) {
  1093. pr_err("Invalid zone capacity\n");
  1094. ret = -EINVAL;
  1095. goto out;
  1096. }
  1097. opts->zone_capacity =
  1098. ((sector_t)token * SZ_1M) >> SECTOR_SHIFT;
  1099. break;
  1100. case ZLOOP_OPT_NR_CONV_ZONES:
  1101. if (match_uint(args, &token)) {
  1102. ret = -EINVAL;
  1103. goto out;
  1104. }
  1105. opts->nr_conv_zones = token;
  1106. break;
  1107. case ZLOOP_OPT_BASE_DIR:
  1108. p = match_strdup(args);
  1109. if (!p) {
  1110. ret = -ENOMEM;
  1111. goto out;
  1112. }
  1113. kfree(opts->base_dir);
  1114. opts->base_dir = p;
  1115. break;
  1116. case ZLOOP_OPT_NR_QUEUES:
  1117. if (match_uint(args, &token)) {
  1118. ret = -EINVAL;
  1119. goto out;
  1120. }
  1121. if (!token) {
  1122. pr_err("Invalid number of queues\n");
  1123. ret = -EINVAL;
  1124. goto out;
  1125. }
  1126. opts->nr_queues = min(token, num_online_cpus());
  1127. break;
  1128. case ZLOOP_OPT_QUEUE_DEPTH:
  1129. if (match_uint(args, &token)) {
  1130. ret = -EINVAL;
  1131. goto out;
  1132. }
  1133. if (!token) {
  1134. pr_err("Invalid queue depth\n");
  1135. ret = -EINVAL;
  1136. goto out;
  1137. }
  1138. opts->queue_depth = token;
  1139. break;
  1140. case ZLOOP_OPT_BUFFERED_IO:
  1141. opts->buffered_io = true;
  1142. break;
  1143. case ZLOOP_OPT_ZONE_APPEND:
  1144. if (match_uint(args, &token)) {
  1145. ret = -EINVAL;
  1146. goto out;
  1147. }
  1148. if (token != 0 && token != 1) {
  1149. pr_err("Invalid zone_append value\n");
  1150. ret = -EINVAL;
  1151. goto out;
  1152. }
  1153. opts->zone_append = token;
  1154. break;
  1155. case ZLOOP_OPT_ORDERED_ZONE_APPEND:
  1156. opts->ordered_zone_append = true;
  1157. break;
  1158. case ZLOOP_OPT_ERR:
  1159. default:
  1160. pr_warn("unknown parameter or missing value '%s'\n", p);
  1161. ret = -EINVAL;
  1162. goto out;
  1163. }
  1164. }
  1165. ret = -EINVAL;
  1166. if (opts->capacity <= opts->zone_size) {
  1167. pr_err("Invalid capacity\n");
  1168. goto out;
  1169. }
  1170. if (opts->zone_capacity > opts->zone_size) {
  1171. pr_err("Invalid zone capacity\n");
  1172. goto out;
  1173. }
  1174. ret = 0;
  1175. out:
  1176. kfree(options);
  1177. return ret;
  1178. }
  1179. enum {
  1180. ZLOOP_CTL_ADD,
  1181. ZLOOP_CTL_REMOVE,
  1182. };
  1183. static struct zloop_ctl_op {
  1184. int code;
  1185. const char *name;
  1186. } zloop_ctl_ops[] = {
  1187. { ZLOOP_CTL_ADD, "add" },
  1188. { ZLOOP_CTL_REMOVE, "remove" },
  1189. { -1, NULL },
  1190. };
  1191. static ssize_t zloop_ctl_write(struct file *file, const char __user *ubuf,
  1192. size_t count, loff_t *pos)
  1193. {
  1194. struct zloop_options opts = { };
  1195. struct zloop_ctl_op *op;
  1196. const char *buf, *opts_buf;
  1197. int i, ret;
  1198. if (count > PAGE_SIZE)
  1199. return -ENOMEM;
  1200. buf = memdup_user_nul(ubuf, count);
  1201. if (IS_ERR(buf))
  1202. return PTR_ERR(buf);
  1203. for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) {
  1204. op = &zloop_ctl_ops[i];
  1205. if (!op->name) {
  1206. pr_err("Invalid operation\n");
  1207. ret = -EINVAL;
  1208. goto out;
  1209. }
  1210. if (!strncmp(buf, op->name, strlen(op->name)))
  1211. break;
  1212. }
  1213. if (count <= strlen(op->name))
  1214. opts_buf = NULL;
  1215. else
  1216. opts_buf = buf + strlen(op->name);
  1217. ret = zloop_parse_options(&opts, opts_buf);
  1218. if (ret) {
  1219. pr_err("Failed to parse options\n");
  1220. goto out;
  1221. }
  1222. switch (op->code) {
  1223. case ZLOOP_CTL_ADD:
  1224. ret = zloop_ctl_add(&opts);
  1225. break;
  1226. case ZLOOP_CTL_REMOVE:
  1227. ret = zloop_ctl_remove(&opts);
  1228. break;
  1229. default:
  1230. pr_err("Invalid operation\n");
  1231. ret = -EINVAL;
  1232. goto out;
  1233. }
  1234. out:
  1235. kfree(opts.base_dir);
  1236. kfree(buf);
  1237. return ret ? ret : count;
  1238. }
  1239. static int zloop_ctl_show(struct seq_file *seq_file, void *private)
  1240. {
  1241. const struct match_token *tok;
  1242. int i;
  1243. /* Add operation */
  1244. seq_printf(seq_file, "%s ", zloop_ctl_ops[0].name);
  1245. for (i = 0; i < ARRAY_SIZE(zloop_opt_tokens); i++) {
  1246. tok = &zloop_opt_tokens[i];
  1247. if (!tok->pattern)
  1248. break;
  1249. if (i)
  1250. seq_putc(seq_file, ',');
  1251. seq_puts(seq_file, tok->pattern);
  1252. }
  1253. seq_putc(seq_file, '\n');
  1254. /* Remove operation */
  1255. seq_puts(seq_file, zloop_ctl_ops[1].name);
  1256. seq_puts(seq_file, " id=%d\n");
  1257. return 0;
  1258. }
  1259. static int zloop_ctl_open(struct inode *inode, struct file *file)
  1260. {
  1261. file->private_data = NULL;
  1262. return single_open(file, zloop_ctl_show, NULL);
  1263. }
  1264. static int zloop_ctl_release(struct inode *inode, struct file *file)
  1265. {
  1266. return single_release(inode, file);
  1267. }
  1268. static const struct file_operations zloop_ctl_fops = {
  1269. .owner = THIS_MODULE,
  1270. .open = zloop_ctl_open,
  1271. .release = zloop_ctl_release,
  1272. .write = zloop_ctl_write,
  1273. .read = seq_read,
  1274. };
  1275. static struct miscdevice zloop_misc = {
  1276. .minor = MISC_DYNAMIC_MINOR,
  1277. .name = "zloop-control",
  1278. .fops = &zloop_ctl_fops,
  1279. };
  1280. static int __init zloop_init(void)
  1281. {
  1282. int ret;
  1283. ret = misc_register(&zloop_misc);
  1284. if (ret) {
  1285. pr_err("Failed to register misc device: %d\n", ret);
  1286. return ret;
  1287. }
  1288. pr_info("Module loaded\n");
  1289. return 0;
  1290. }
  1291. static void __exit zloop_exit(void)
  1292. {
  1293. misc_deregister(&zloop_misc);
  1294. idr_destroy(&zloop_index_idr);
  1295. }
  1296. module_init(zloop_init);
  1297. module_exit(zloop_exit);
  1298. MODULE_DESCRIPTION("Zoned loopback device");
  1299. MODULE_LICENSE("GPL");