kublk.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef KUBLK_INTERNAL_H
  3. #define KUBLK_INTERNAL_H
  4. #include <unistd.h>
  5. #include <stdlib.h>
  6. #include <assert.h>
  7. #include <stdio.h>
  8. #include <stdarg.h>
  9. #include <string.h>
  10. #include <pthread.h>
  11. #include <getopt.h>
  12. #include <limits.h>
  13. #include <poll.h>
  14. #include <fcntl.h>
  15. #include <sys/syscall.h>
  16. #include <sys/mman.h>
  17. #include <sys/ioctl.h>
  18. #include <sys/inotify.h>
  19. #include <sys/wait.h>
  20. #include <sys/eventfd.h>
  21. #include <sys/ipc.h>
  22. #include <sys/shm.h>
  23. #include <linux/io_uring.h>
  24. #include <liburing.h>
  25. #include <semaphore.h>
  26. /* allow ublk_dep.h to override ublk_cmd.h */
  27. #include "ublk_dep.h"
  28. #include <linux/ublk_cmd.h>
  29. #include "utils.h"
  30. #define MAX_BACK_FILES 4
  31. /****************** part 1: libublk ********************/
  32. #define CTRL_DEV "/dev/ublk-control"
  33. #define UBLKC_DEV "/dev/ublkc"
  34. #define UBLKB_DEV "/dev/ublkb"
  35. #define UBLK_CTRL_RING_DEPTH 32
  36. #define ERROR_EVTFD_DEVID -2
  37. #define UBLK_IO_MAX_BYTES (1 << 20)
  38. #define UBLK_MAX_QUEUES_SHIFT 5
  39. #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT)
  40. #define UBLK_MAX_THREADS_SHIFT 5
  41. #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT)
  42. #define UBLK_QUEUE_DEPTH 1024
  43. struct ublk_dev;
  44. struct ublk_queue;
  45. struct ublk_thread;
  46. struct stripe_ctx {
  47. /* stripe */
  48. unsigned int chunk_size;
  49. };
  50. struct fault_inject_ctx {
  51. /* fault_inject */
  52. unsigned long delay_us;
  53. };
  54. struct dev_ctx {
  55. char tgt_type[16];
  56. unsigned long flags;
  57. unsigned nr_hw_queues;
  58. unsigned short nthreads;
  59. unsigned queue_depth;
  60. int dev_id;
  61. int nr_files;
  62. char *files[MAX_BACK_FILES];
  63. unsigned int logging:1;
  64. unsigned int all:1;
  65. unsigned int fg:1;
  66. unsigned int recovery:1;
  67. unsigned int auto_zc_fallback:1;
  68. unsigned int per_io_tasks:1;
  69. unsigned int no_ublk_fixed_fd:1;
  70. unsigned int safe_stop:1;
  71. unsigned int no_auto_part_scan:1;
  72. __u32 integrity_flags;
  73. __u8 metadata_size;
  74. __u8 pi_offset;
  75. __u8 csum_type;
  76. __u8 tag_size;
  77. int _evtfd;
  78. int _shmid;
  79. /* built from shmem, only for ublk_dump_dev() */
  80. struct ublk_dev *shadow_dev;
  81. /* for 'update_size' command */
  82. unsigned long long size;
  83. union {
  84. struct stripe_ctx stripe;
  85. struct fault_inject_ctx fault_inject;
  86. };
  87. };
  88. struct ublk_ctrl_cmd_data {
  89. __u32 cmd_op;
  90. #define CTRL_CMD_HAS_DATA 1
  91. #define CTRL_CMD_HAS_BUF 2
  92. __u32 flags;
  93. __u64 data[2];
  94. __u64 addr;
  95. __u32 len;
  96. };
  97. struct ublk_io {
  98. char *buf_addr;
  99. void *integrity_buf;
  100. #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0)
  101. #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1)
  102. #define UBLKS_IO_FREE (1UL << 2)
  103. #define UBLKS_IO_NEED_GET_DATA (1UL << 3)
  104. #define UBLKS_IO_NEED_REG_BUF (1UL << 4)
  105. unsigned short flags;
  106. unsigned short refs; /* used by target code only */
  107. int tag;
  108. int result;
  109. unsigned short buf_index;
  110. unsigned short tgt_ios;
  111. void *private_data;
  112. };
  113. struct ublk_tgt_ops {
  114. const char *name;
  115. int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
  116. void (*deinit_tgt)(struct ublk_dev *);
  117. int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
  118. void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
  119. const struct io_uring_cqe *);
  120. /*
  121. * Target specific command line handling
  122. *
  123. * each option requires argument for target command line
  124. */
  125. void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
  126. void (*usage)(const struct ublk_tgt_ops *ops);
  127. /* return buffer index for UBLK_F_AUTO_BUF_REG */
  128. unsigned short (*buf_index)(const struct ublk_thread *t,
  129. const struct ublk_queue *, int tag);
  130. };
  131. struct ublk_tgt {
  132. unsigned long dev_size;
  133. unsigned int sq_depth;
  134. unsigned int cq_depth;
  135. const struct ublk_tgt_ops *ops;
  136. struct ublk_params params;
  137. int nr_backing_files;
  138. unsigned long backing_file_size[MAX_BACK_FILES];
  139. char backing_file[MAX_BACK_FILES][PATH_MAX];
  140. };
  141. struct ublk_queue {
  142. int q_id;
  143. int q_depth;
  144. struct ublk_dev *dev;
  145. const struct ublk_tgt_ops *tgt_ops;
  146. struct ublksrv_io_desc *io_cmd_buf;
  147. /* borrow three bit of ublk uapi flags, which may never be used */
  148. #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63)
  149. #define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62)
  150. #define UBLKS_Q_PREPARED (1ULL << 61)
  151. __u64 flags;
  152. int ublk_fd; /* cached ublk char device fd */
  153. __u8 metadata_size;
  154. struct ublk_io ios[UBLK_QUEUE_DEPTH];
  155. /* used for prep io commands */
  156. pthread_spinlock_t lock;
  157. };
  158. /* align with `ublk_elem_header` */
  159. struct ublk_batch_elem {
  160. __u16 tag;
  161. __u16 buf_index;
  162. __s32 result;
  163. __u64 buf_addr;
  164. };
  165. struct batch_commit_buf {
  166. unsigned short q_id;
  167. unsigned short buf_idx;
  168. void *elem;
  169. unsigned short done;
  170. unsigned short count;
  171. };
  172. struct batch_fetch_buf {
  173. struct io_uring_buf_ring *br;
  174. void *fetch_buf;
  175. unsigned int fetch_buf_size;
  176. unsigned int fetch_buf_off;
  177. };
  178. struct ublk_thread {
  179. /* Thread-local copy of queue-to-thread mapping for this thread */
  180. unsigned char q_map[UBLK_MAX_QUEUES];
  181. struct ublk_dev *dev;
  182. unsigned short idx;
  183. unsigned short nr_queues;
  184. #define UBLKS_T_STOPPING (1U << 0)
  185. #define UBLKS_T_IDLE (1U << 1)
  186. #define UBLKS_T_BATCH_IO (1U << 31) /* readonly */
  187. unsigned state;
  188. unsigned int cmd_inflight;
  189. unsigned int io_inflight;
  190. unsigned short nr_bufs;
  191. /* followings are for BATCH_IO */
  192. unsigned short commit_buf_start;
  193. unsigned char commit_buf_elem_size;
  194. /*
  195. * We just support single device, so pre-calculate commit/prep flags
  196. */
  197. unsigned short cmd_flags;
  198. unsigned int nr_commit_buf;
  199. unsigned int commit_buf_size;
  200. void *commit_buf;
  201. #define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1)
  202. struct allocator commit_buf_alloc;
  203. struct batch_commit_buf *commit;
  204. /* FETCH_IO_CMDS buffer */
  205. unsigned short nr_fetch_bufs;
  206. struct batch_fetch_buf *fetch;
  207. struct io_uring ring;
  208. };
  209. struct ublk_dev {
  210. struct ublk_tgt tgt;
  211. struct ublksrv_ctrl_dev_info dev_info;
  212. struct ublk_queue q[UBLK_MAX_QUEUES];
  213. unsigned nthreads;
  214. unsigned per_io_tasks;
  215. int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */
  216. int nr_fds;
  217. int ctrl_fd;
  218. struct io_uring ring;
  219. void *private_data;
  220. };
  221. extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);
  222. static inline int __ublk_use_batch_io(__u64 flags)
  223. {
  224. return flags & UBLK_F_BATCH_IO;
  225. }
  226. static inline int ublk_queue_batch_io(const struct ublk_queue *q)
  227. {
  228. return __ublk_use_batch_io(q->flags);
  229. }
  230. static inline int ublk_dev_batch_io(const struct ublk_dev *dev)
  231. {
  232. return __ublk_use_batch_io(dev->dev_info.flags);
  233. }
  234. /* only work for handle single device in this pthread context */
  235. static inline int ublk_thread_batch_io(const struct ublk_thread *t)
  236. {
  237. return t->state & UBLKS_T_BATCH_IO;
  238. }
  239. static inline void ublk_set_integrity_params(const struct dev_ctx *ctx,
  240. struct ublk_params *params)
  241. {
  242. if (!ctx->metadata_size)
  243. return;
  244. params->types |= UBLK_PARAM_TYPE_INTEGRITY;
  245. params->integrity = (struct ublk_param_integrity) {
  246. .flags = ctx->integrity_flags,
  247. .interval_exp = params->basic.logical_bs_shift,
  248. .metadata_size = ctx->metadata_size,
  249. .pi_offset = ctx->pi_offset,
  250. .csum_type = ctx->csum_type,
  251. .tag_size = ctx->tag_size,
  252. };
  253. }
  254. static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len)
  255. {
  256. /* All targets currently use interval_exp = logical_bs_shift = 9 */
  257. return (len >> 9) * q->metadata_size;
  258. }
  259. static inline size_t
  260. ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len)
  261. {
  262. return (integrity_len / q->metadata_size) << 9;
  263. }
  264. static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
  265. {
  266. return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
  267. }
  268. static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag)
  269. {
  270. return UBLKSRV_IO_BUF_OFFSET +
  271. ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF);
  272. }
  273. static inline int is_target_io(__u64 user_data)
  274. {
  275. return (user_data & (1ULL << 63)) != 0;
  276. }
  277. static inline __u64 build_user_data(unsigned tag, unsigned op,
  278. unsigned tgt_data, unsigned q_id, unsigned is_target_io)
  279. {
  280. /* we only have 7 bits to encode q_id */
  281. _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7");
  282. ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
  283. return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) |
  284. (__u64)q_id << 56 | (__u64)is_target_io << 63;
  285. }
  286. static inline unsigned int user_data_to_tag(__u64 user_data)
  287. {
  288. return user_data & 0xffff;
  289. }
  290. static inline unsigned int user_data_to_op(__u64 user_data)
  291. {
  292. return (user_data >> 16) & 0xff;
  293. }
  294. static inline unsigned int user_data_to_tgt_data(__u64 user_data)
  295. {
  296. return (user_data >> 24) & 0xffff;
  297. }
  298. static inline unsigned int user_data_to_q_id(__u64 user_data)
  299. {
  300. return (user_data >> 56) & 0x7f;
  301. }
  302. static inline unsigned short ublk_cmd_op_nr(unsigned int op)
  303. {
  304. return _IOC_NR(op);
  305. }
  306. static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
  307. {
  308. return container_of(io, struct ublk_queue, ios[io->tag]);
  309. }
  310. static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
  311. struct io_uring_sqe *sqes[], int nr_sqes)
  312. {
  313. struct io_uring *ring = &t->ring;
  314. unsigned left = io_uring_sq_space_left(ring);
  315. int i;
  316. if (left < nr_sqes)
  317. io_uring_submit(ring);
  318. for (i = 0; i < nr_sqes; i++) {
  319. sqes[i] = io_uring_get_sqe(ring);
  320. if (!sqes[i])
  321. return i;
  322. }
  323. return nr_sqes;
  324. }
  325. static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index)
  326. {
  327. if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
  328. if (fd_index == 0)
  329. /* Return the raw ublk FD for index 0 */
  330. return q->ublk_fd;
  331. /* Adjust index for backing files (index 1 becomes 0, etc.) */
  332. return fd_index - 1;
  333. }
  334. return fd_index;
  335. }
  336. static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe,
  337. struct ublk_queue *q, int tag, int q_id, __u64 index)
  338. {
  339. struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
  340. int dev_fd = ublk_get_registered_fd(q, 0);
  341. io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
  342. sqe->opcode = IORING_OP_URING_CMD;
  343. if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
  344. sqe->flags &= ~IOSQE_FIXED_FILE;
  345. else
  346. sqe->flags |= IOSQE_FIXED_FILE;
  347. cmd->tag = tag;
  348. cmd->addr = index;
  349. cmd->q_id = q_id;
  350. }
  351. static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
  352. struct ublk_queue *q, int tag, int q_id, __u64 index)
  353. {
  354. __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
  355. sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
  356. }
  357. static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
  358. struct ublk_queue *q, int tag, int q_id, __u64 index)
  359. {
  360. __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
  361. sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
  362. }
  363. static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
  364. {
  365. return (void *)&sqe->cmd;
  366. }
  367. static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
  368. {
  369. q->ios[tag].result = res;
  370. }
  371. static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
  372. {
  373. return q->ios[tag].result;
  374. }
  375. static inline void ublk_mark_io_done(struct ublk_io *io, int res)
  376. {
  377. io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE);
  378. io->result = res;
  379. }
  380. static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
  381. {
  382. return &q->io_cmd_buf[tag];
  383. }
  384. static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
  385. {
  386. __u32 *addr = (__u32 *)&sqe->off;
  387. addr[0] = cmd_op;
  388. addr[1] = 0;
  389. }
  390. static inline unsigned short ublk_batch_io_buf_idx(
  391. const struct ublk_thread *t, const struct ublk_queue *q,
  392. unsigned tag);
  393. static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t,
  394. const struct ublk_queue *q,
  395. unsigned tag)
  396. {
  397. if (ublk_queue_batch_io(q))
  398. return ublk_batch_io_buf_idx(t, q, tag);
  399. return q->ios[tag].buf_index;
  400. }
  401. static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
  402. {
  403. return &q->ios[tag];
  404. }
  405. static inline int ublk_completed_tgt_io(struct ublk_thread *t,
  406. struct ublk_queue *q, unsigned tag)
  407. {
  408. struct ublk_io *io = ublk_get_io(q, tag);
  409. t->io_inflight--;
  410. return --io->tgt_ios == 0;
  411. }
  412. static inline bool ublk_queue_use_zc(const struct ublk_queue *q)
  413. {
  414. return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY);
  415. }
  416. static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q)
  417. {
  418. return !!(q->flags & UBLK_F_AUTO_BUF_REG);
  419. }
  420. static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
  421. {
  422. return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
  423. }
  424. static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q)
  425. {
  426. return !!(q->flags & UBLK_F_USER_COPY);
  427. }
  428. static inline int ublk_queue_no_buf(const struct ublk_queue *q)
  429. {
  430. return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
  431. }
  432. static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb)
  433. {
  434. return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX;
  435. }
  436. static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t,
  437. const struct ublk_queue *q)
  438. {
  439. unsigned char idx;
  440. idx = t->q_map[q->q_id];
  441. ublk_assert(idx != 0);
  442. return idx - 1;
  443. }
  444. /*
  445. * Each IO's buffer index has to be calculated by this helper for
  446. * UBLKS_T_BATCH_IO
  447. */
  448. static inline unsigned short ublk_batch_io_buf_idx(
  449. const struct ublk_thread *t, const struct ublk_queue *q,
  450. unsigned tag)
  451. {
  452. return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag;
  453. }
  454. /* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */
  455. int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
  456. /* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */
  457. void ublk_batch_start_fetch(struct ublk_thread *t);
  458. /* Handle completion of batch I/O commands (prep/commit) */
  459. void ublk_batch_compl_cmd(struct ublk_thread *t,
  460. const struct io_uring_cqe *cqe);
  461. /* Initialize batch I/O state and calculate buffer parameters */
  462. void ublk_batch_prepare(struct ublk_thread *t);
  463. /* Allocate and register commit buffers for batch operations */
  464. int ublk_batch_alloc_buf(struct ublk_thread *t);
  465. /* Free commit buffers and cleanup batch allocator */
  466. void ublk_batch_free_buf(struct ublk_thread *t);
  467. /* Prepare a new commit buffer for batching completed I/O operations */
  468. void ublk_batch_prep_commit(struct ublk_thread *t);
  469. /* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */
  470. void ublk_batch_commit_io_cmds(struct ublk_thread *t);
  471. /* Add a completed I/O operation to the current batch commit buffer */
  472. void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q,
  473. unsigned tag, int res);
  474. void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES],
  475. int nthreads, int queues);
  476. static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
  477. unsigned tag, int res)
  478. {
  479. if (ublk_queue_batch_io(q)) {
  480. ublk_batch_complete_io(t, q, tag, res);
  481. return 0;
  482. } else {
  483. struct ublk_io *io = &q->ios[tag];
  484. ublk_mark_io_done(io, res);
  485. return ublk_queue_io_cmd(t, io);
  486. }
  487. }
  488. static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
  489. unsigned tag, int queued)
  490. {
  491. if (queued < 0)
  492. ublk_complete_io(t, q, tag, queued);
  493. else {
  494. struct ublk_io *io = ublk_get_io(q, tag);
  495. t->io_inflight += queued;
  496. io->tgt_ios = queued;
  497. io->result = 0;
  498. }
  499. }
  500. extern const struct ublk_tgt_ops null_tgt_ops;
  501. extern const struct ublk_tgt_ops loop_tgt_ops;
  502. extern const struct ublk_tgt_ops stripe_tgt_ops;
  503. extern const struct ublk_tgt_ops fault_inject_tgt_ops;
  504. void backing_file_tgt_deinit(struct ublk_dev *dev);
  505. int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct);
  506. #endif