| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610 |
- /* SPDX-License-Identifier: GPL-2.0 */
- #ifndef KUBLK_INTERNAL_H
- #define KUBLK_INTERNAL_H
- #include <unistd.h>
- #include <stdlib.h>
- #include <assert.h>
- #include <stdio.h>
- #include <stdarg.h>
- #include <string.h>
- #include <pthread.h>
- #include <getopt.h>
- #include <limits.h>
- #include <poll.h>
- #include <fcntl.h>
- #include <sys/syscall.h>
- #include <sys/mman.h>
- #include <sys/ioctl.h>
- #include <sys/inotify.h>
- #include <sys/wait.h>
- #include <sys/eventfd.h>
- #include <sys/ipc.h>
- #include <sys/shm.h>
- #include <linux/io_uring.h>
- #include <liburing.h>
- #include <semaphore.h>
- /* allow ublk_dep.h to override ublk_cmd.h */
- #include "ublk_dep.h"
- #include <linux/ublk_cmd.h>
- #include "utils.h"
- #define MAX_BACK_FILES 4
- /****************** part 1: libublk ********************/
- #define CTRL_DEV "/dev/ublk-control"
- #define UBLKC_DEV "/dev/ublkc"
- #define UBLKB_DEV "/dev/ublkb"
- #define UBLK_CTRL_RING_DEPTH 32
- #define ERROR_EVTFD_DEVID -2
- #define UBLK_IO_MAX_BYTES (1 << 20)
- #define UBLK_MAX_QUEUES_SHIFT 5
- #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT)
- #define UBLK_MAX_THREADS_SHIFT 5
- #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT)
- #define UBLK_QUEUE_DEPTH 1024
- struct ublk_dev;
- struct ublk_queue;
- struct ublk_thread;
- struct stripe_ctx {
- /* stripe */
- unsigned int chunk_size;
- };
- struct fault_inject_ctx {
- /* fault_inject */
- unsigned long delay_us;
- };
- struct dev_ctx {
- char tgt_type[16];
- unsigned long flags;
- unsigned nr_hw_queues;
- unsigned short nthreads;
- unsigned queue_depth;
- int dev_id;
- int nr_files;
- char *files[MAX_BACK_FILES];
- unsigned int logging:1;
- unsigned int all:1;
- unsigned int fg:1;
- unsigned int recovery:1;
- unsigned int auto_zc_fallback:1;
- unsigned int per_io_tasks:1;
- unsigned int no_ublk_fixed_fd:1;
- unsigned int safe_stop:1;
- unsigned int no_auto_part_scan:1;
- __u32 integrity_flags;
- __u8 metadata_size;
- __u8 pi_offset;
- __u8 csum_type;
- __u8 tag_size;
- int _evtfd;
- int _shmid;
- /* built from shmem, only for ublk_dump_dev() */
- struct ublk_dev *shadow_dev;
- /* for 'update_size' command */
- unsigned long long size;
- union {
- struct stripe_ctx stripe;
- struct fault_inject_ctx fault_inject;
- };
- };
- struct ublk_ctrl_cmd_data {
- __u32 cmd_op;
- #define CTRL_CMD_HAS_DATA 1
- #define CTRL_CMD_HAS_BUF 2
- __u32 flags;
- __u64 data[2];
- __u64 addr;
- __u32 len;
- };
- struct ublk_io {
- char *buf_addr;
- void *integrity_buf;
- #define UBLKS_IO_NEED_FETCH_RQ (1UL << 0)
- #define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1)
- #define UBLKS_IO_FREE (1UL << 2)
- #define UBLKS_IO_NEED_GET_DATA (1UL << 3)
- #define UBLKS_IO_NEED_REG_BUF (1UL << 4)
- unsigned short flags;
- unsigned short refs; /* used by target code only */
- int tag;
- int result;
- unsigned short buf_index;
- unsigned short tgt_ios;
- void *private_data;
- };
- struct ublk_tgt_ops {
- const char *name;
- int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
- void (*deinit_tgt)(struct ublk_dev *);
- int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
- void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
- const struct io_uring_cqe *);
- /*
- * Target specific command line handling
- *
- * each option requires argument for target command line
- */
- void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
- void (*usage)(const struct ublk_tgt_ops *ops);
- /* return buffer index for UBLK_F_AUTO_BUF_REG */
- unsigned short (*buf_index)(const struct ublk_thread *t,
- const struct ublk_queue *, int tag);
- };
- struct ublk_tgt {
- unsigned long dev_size;
- unsigned int sq_depth;
- unsigned int cq_depth;
- const struct ublk_tgt_ops *ops;
- struct ublk_params params;
- int nr_backing_files;
- unsigned long backing_file_size[MAX_BACK_FILES];
- char backing_file[MAX_BACK_FILES][PATH_MAX];
- };
- struct ublk_queue {
- int q_id;
- int q_depth;
- struct ublk_dev *dev;
- const struct ublk_tgt_ops *tgt_ops;
- struct ublksrv_io_desc *io_cmd_buf;
- /* borrow three bit of ublk uapi flags, which may never be used */
- #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63)
- #define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62)
- #define UBLKS_Q_PREPARED (1ULL << 61)
- __u64 flags;
- int ublk_fd; /* cached ublk char device fd */
- __u8 metadata_size;
- struct ublk_io ios[UBLK_QUEUE_DEPTH];
- /* used for prep io commands */
- pthread_spinlock_t lock;
- };
- /* align with `ublk_elem_header` */
- struct ublk_batch_elem {
- __u16 tag;
- __u16 buf_index;
- __s32 result;
- __u64 buf_addr;
- };
- struct batch_commit_buf {
- unsigned short q_id;
- unsigned short buf_idx;
- void *elem;
- unsigned short done;
- unsigned short count;
- };
- struct batch_fetch_buf {
- struct io_uring_buf_ring *br;
- void *fetch_buf;
- unsigned int fetch_buf_size;
- unsigned int fetch_buf_off;
- };
- struct ublk_thread {
- /* Thread-local copy of queue-to-thread mapping for this thread */
- unsigned char q_map[UBLK_MAX_QUEUES];
- struct ublk_dev *dev;
- unsigned short idx;
- unsigned short nr_queues;
- #define UBLKS_T_STOPPING (1U << 0)
- #define UBLKS_T_IDLE (1U << 1)
- #define UBLKS_T_BATCH_IO (1U << 31) /* readonly */
- unsigned state;
- unsigned int cmd_inflight;
- unsigned int io_inflight;
- unsigned short nr_bufs;
- /* followings are for BATCH_IO */
- unsigned short commit_buf_start;
- unsigned char commit_buf_elem_size;
- /*
- * We just support single device, so pre-calculate commit/prep flags
- */
- unsigned short cmd_flags;
- unsigned int nr_commit_buf;
- unsigned int commit_buf_size;
- void *commit_buf;
- #define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1)
- struct allocator commit_buf_alloc;
- struct batch_commit_buf *commit;
- /* FETCH_IO_CMDS buffer */
- unsigned short nr_fetch_bufs;
- struct batch_fetch_buf *fetch;
- struct io_uring ring;
- };
- struct ublk_dev {
- struct ublk_tgt tgt;
- struct ublksrv_ctrl_dev_info dev_info;
- struct ublk_queue q[UBLK_MAX_QUEUES];
- unsigned nthreads;
- unsigned per_io_tasks;
- int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */
- int nr_fds;
- int ctrl_fd;
- struct io_uring ring;
- void *private_data;
- };
- extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io);
- static inline int __ublk_use_batch_io(__u64 flags)
- {
- return flags & UBLK_F_BATCH_IO;
- }
- static inline int ublk_queue_batch_io(const struct ublk_queue *q)
- {
- return __ublk_use_batch_io(q->flags);
- }
- static inline int ublk_dev_batch_io(const struct ublk_dev *dev)
- {
- return __ublk_use_batch_io(dev->dev_info.flags);
- }
- /* only work for handle single device in this pthread context */
- static inline int ublk_thread_batch_io(const struct ublk_thread *t)
- {
- return t->state & UBLKS_T_BATCH_IO;
- }
- static inline void ublk_set_integrity_params(const struct dev_ctx *ctx,
- struct ublk_params *params)
- {
- if (!ctx->metadata_size)
- return;
- params->types |= UBLK_PARAM_TYPE_INTEGRITY;
- params->integrity = (struct ublk_param_integrity) {
- .flags = ctx->integrity_flags,
- .interval_exp = params->basic.logical_bs_shift,
- .metadata_size = ctx->metadata_size,
- .pi_offset = ctx->pi_offset,
- .csum_type = ctx->csum_type,
- .tag_size = ctx->tag_size,
- };
- }
- static inline size_t ublk_integrity_len(const struct ublk_queue *q, size_t len)
- {
- /* All targets currently use interval_exp = logical_bs_shift = 9 */
- return (len >> 9) * q->metadata_size;
- }
- static inline size_t
- ublk_integrity_data_len(const struct ublk_queue *q, size_t integrity_len)
- {
- return (integrity_len / q->metadata_size) << 9;
- }
- static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
- {
- return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
- }
- static inline __u64 ublk_user_copy_offset(unsigned q_id, unsigned tag)
- {
- return UBLKSRV_IO_BUF_OFFSET +
- ((__u64)q_id << UBLK_QID_OFF | (__u64)tag << UBLK_TAG_OFF);
- }
- static inline int is_target_io(__u64 user_data)
- {
- return (user_data & (1ULL << 63)) != 0;
- }
- static inline __u64 build_user_data(unsigned tag, unsigned op,
- unsigned tgt_data, unsigned q_id, unsigned is_target_io)
- {
- /* we only have 7 bits to encode q_id */
- _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7, "UBLK_MAX_QUEUES_SHIFT must be <= 7");
- ublk_assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
- return tag | ((__u64)op << 16) | ((__u64)tgt_data << 24) |
- (__u64)q_id << 56 | (__u64)is_target_io << 63;
- }
- static inline unsigned int user_data_to_tag(__u64 user_data)
- {
- return user_data & 0xffff;
- }
- static inline unsigned int user_data_to_op(__u64 user_data)
- {
- return (user_data >> 16) & 0xff;
- }
- static inline unsigned int user_data_to_tgt_data(__u64 user_data)
- {
- return (user_data >> 24) & 0xffff;
- }
- static inline unsigned int user_data_to_q_id(__u64 user_data)
- {
- return (user_data >> 56) & 0x7f;
- }
- static inline unsigned short ublk_cmd_op_nr(unsigned int op)
- {
- return _IOC_NR(op);
- }
- static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
- {
- return container_of(io, struct ublk_queue, ios[io->tag]);
- }
- static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
- struct io_uring_sqe *sqes[], int nr_sqes)
- {
- struct io_uring *ring = &t->ring;
- unsigned left = io_uring_sq_space_left(ring);
- int i;
- if (left < nr_sqes)
- io_uring_submit(ring);
- for (i = 0; i < nr_sqes; i++) {
- sqes[i] = io_uring_get_sqe(ring);
- if (!sqes[i])
- return i;
- }
- return nr_sqes;
- }
- static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index)
- {
- if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
- if (fd_index == 0)
- /* Return the raw ublk FD for index 0 */
- return q->ublk_fd;
- /* Adjust index for backing files (index 1 becomes 0, etc.) */
- return fd_index - 1;
- }
- return fd_index;
- }
- static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe,
- struct ublk_queue *q, int tag, int q_id, __u64 index)
- {
- struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
- int dev_fd = ublk_get_registered_fd(q, 0);
- io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
- sqe->opcode = IORING_OP_URING_CMD;
- if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
- sqe->flags &= ~IOSQE_FIXED_FILE;
- else
- sqe->flags |= IOSQE_FIXED_FILE;
- cmd->tag = tag;
- cmd->addr = index;
- cmd->q_id = q_id;
- }
- static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
- struct ublk_queue *q, int tag, int q_id, __u64 index)
- {
- __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
- sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
- }
- static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
- struct ublk_queue *q, int tag, int q_id, __u64 index)
- {
- __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
- sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
- }
- static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
- {
- return (void *)&sqe->cmd;
- }
- static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
- {
- q->ios[tag].result = res;
- }
- static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
- {
- return q->ios[tag].result;
- }
- static inline void ublk_mark_io_done(struct ublk_io *io, int res)
- {
- io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE);
- io->result = res;
- }
- static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
- {
- return &q->io_cmd_buf[tag];
- }
- static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
- {
- __u32 *addr = (__u32 *)&sqe->off;
- addr[0] = cmd_op;
- addr[1] = 0;
- }
- static inline unsigned short ublk_batch_io_buf_idx(
- const struct ublk_thread *t, const struct ublk_queue *q,
- unsigned tag);
- static inline unsigned short ublk_io_buf_idx(const struct ublk_thread *t,
- const struct ublk_queue *q,
- unsigned tag)
- {
- if (ublk_queue_batch_io(q))
- return ublk_batch_io_buf_idx(t, q, tag);
- return q->ios[tag].buf_index;
- }
- static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
- {
- return &q->ios[tag];
- }
- static inline int ublk_completed_tgt_io(struct ublk_thread *t,
- struct ublk_queue *q, unsigned tag)
- {
- struct ublk_io *io = ublk_get_io(q, tag);
- t->io_inflight--;
- return --io->tgt_ios == 0;
- }
- static inline bool ublk_queue_use_zc(const struct ublk_queue *q)
- {
- return !!(q->flags & UBLK_F_SUPPORT_ZERO_COPY);
- }
- static inline bool ublk_queue_use_auto_zc(const struct ublk_queue *q)
- {
- return !!(q->flags & UBLK_F_AUTO_BUF_REG);
- }
- static inline bool ublk_queue_auto_zc_fallback(const struct ublk_queue *q)
- {
- return !!(q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK);
- }
- static inline bool ublk_queue_use_user_copy(const struct ublk_queue *q)
- {
- return !!(q->flags & UBLK_F_USER_COPY);
- }
- static inline int ublk_queue_no_buf(const struct ublk_queue *q)
- {
- return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q);
- }
- static inline int ublk_batch_commit_prepared(struct batch_commit_buf *cb)
- {
- return cb->buf_idx != UBLKS_T_COMMIT_BUF_INV_IDX;
- }
- static inline unsigned ublk_queue_idx_in_thread(const struct ublk_thread *t,
- const struct ublk_queue *q)
- {
- unsigned char idx;
- idx = t->q_map[q->q_id];
- ublk_assert(idx != 0);
- return idx - 1;
- }
- /*
- * Each IO's buffer index has to be calculated by this helper for
- * UBLKS_T_BATCH_IO
- */
- static inline unsigned short ublk_batch_io_buf_idx(
- const struct ublk_thread *t, const struct ublk_queue *q,
- unsigned tag)
- {
- return ublk_queue_idx_in_thread(t, q) * q->q_depth + tag;
- }
- /* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */
- int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q);
- /* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */
- void ublk_batch_start_fetch(struct ublk_thread *t);
- /* Handle completion of batch I/O commands (prep/commit) */
- void ublk_batch_compl_cmd(struct ublk_thread *t,
- const struct io_uring_cqe *cqe);
- /* Initialize batch I/O state and calculate buffer parameters */
- void ublk_batch_prepare(struct ublk_thread *t);
- /* Allocate and register commit buffers for batch operations */
- int ublk_batch_alloc_buf(struct ublk_thread *t);
- /* Free commit buffers and cleanup batch allocator */
- void ublk_batch_free_buf(struct ublk_thread *t);
- /* Prepare a new commit buffer for batching completed I/O operations */
- void ublk_batch_prep_commit(struct ublk_thread *t);
- /* Submit UBLK_U_IO_COMMIT_IO_CMDS with batched completed I/O operations */
- void ublk_batch_commit_io_cmds(struct ublk_thread *t);
- /* Add a completed I/O operation to the current batch commit buffer */
- void ublk_batch_complete_io(struct ublk_thread *t, struct ublk_queue *q,
- unsigned tag, int res);
- void ublk_batch_setup_map(unsigned char (*q_thread_map)[UBLK_MAX_QUEUES],
- int nthreads, int queues);
- static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q,
- unsigned tag, int res)
- {
- if (ublk_queue_batch_io(q)) {
- ublk_batch_complete_io(t, q, tag, res);
- return 0;
- } else {
- struct ublk_io *io = &q->ios[tag];
- ublk_mark_io_done(io, res);
- return ublk_queue_io_cmd(t, io);
- }
- }
- static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q,
- unsigned tag, int queued)
- {
- if (queued < 0)
- ublk_complete_io(t, q, tag, queued);
- else {
- struct ublk_io *io = ublk_get_io(q, tag);
- t->io_inflight += queued;
- io->tgt_ios = queued;
- io->result = 0;
- }
- }
- extern const struct ublk_tgt_ops null_tgt_ops;
- extern const struct ublk_tgt_ops loop_tgt_ops;
- extern const struct ublk_tgt_ops stripe_tgt_ops;
- extern const struct ublk_tgt_ops fault_inject_tgt_ops;
- void backing_file_tgt_deinit(struct ublk_dev *dev);
- int backing_file_tgt_init(struct ublk_dev *dev, unsigned int nr_direct);
- #endif
|