file_backed.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include "kublk.h"
  3. static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc)
  4. {
  5. unsigned ublk_op = ublksrv_get_op(iod);
  6. if (ublk_op == UBLK_IO_OP_READ)
  7. return zc ? IORING_OP_READ_FIXED : IORING_OP_READ;
  8. else if (ublk_op == UBLK_IO_OP_WRITE)
  9. return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE;
  10. ublk_assert(0);
  11. }
  12. static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
  13. const struct ublksrv_io_desc *iod, int tag)
  14. {
  15. unsigned ublk_op = ublksrv_get_op(iod);
  16. struct io_uring_sqe *sqe[1];
  17. ublk_io_alloc_sqes(t, sqe, 1);
  18. io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC);
  19. io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
  20. /* bit63 marks us as tgt io */
  21. sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
  22. return 1;
  23. }
  24. static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
  25. const struct ublksrv_io_desc *iod, int tag)
  26. {
  27. unsigned ublk_op = ublksrv_get_op(iod);
  28. unsigned zc = ublk_queue_use_zc(q);
  29. unsigned auto_zc = ublk_queue_use_auto_zc(q);
  30. enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
  31. struct ublk_io *io = ublk_get_io(q, tag);
  32. __u64 offset = iod->start_sector << 9;
  33. __u32 len = iod->nr_sectors << 9;
  34. struct io_uring_sqe *sqe[3];
  35. void *addr = io->buf_addr;
  36. unsigned short buf_index = ublk_io_buf_idx(t, q, tag);
  37. if (iod->op_flags & UBLK_IO_F_INTEGRITY) {
  38. ublk_io_alloc_sqes(t, sqe, 1);
  39. /* Use second backing file for integrity data */
  40. io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 2),
  41. io->integrity_buf,
  42. ublk_integrity_len(q, len),
  43. ublk_integrity_len(q, offset));
  44. sqe[0]->flags = IOSQE_FIXED_FILE;
  45. /* tgt_data = 1 indicates integrity I/O */
  46. sqe[0]->user_data = build_user_data(tag, ublk_op, 1, q->q_id, 1);
  47. }
  48. if (!zc || auto_zc) {
  49. ublk_io_alloc_sqes(t, sqe, 1);
  50. if (!sqe[0])
  51. return -ENOMEM;
  52. io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/,
  53. addr,
  54. len,
  55. offset);
  56. if (auto_zc)
  57. sqe[0]->buf_index = buf_index;
  58. io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
  59. /* bit63 marks us as tgt io */
  60. sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
  61. return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 1;
  62. }
  63. ublk_io_alloc_sqes(t, sqe, 3);
  64. io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, buf_index);
  65. sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
  66. sqe[0]->user_data = build_user_data(tag,
  67. ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
  68. io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0,
  69. len,
  70. offset);
  71. sqe[1]->buf_index = buf_index;
  72. sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
  73. sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
  74. io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, buf_index);
  75. sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
  76. return !!(iod->op_flags & UBLK_IO_F_INTEGRITY) + 2;
  77. }
  78. static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag)
  79. {
  80. const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
  81. unsigned ublk_op = ublksrv_get_op(iod);
  82. int ret;
  83. switch (ublk_op) {
  84. case UBLK_IO_OP_FLUSH:
  85. ret = loop_queue_flush_io(t, q, iod, tag);
  86. break;
  87. case UBLK_IO_OP_WRITE_ZEROES:
  88. case UBLK_IO_OP_DISCARD:
  89. ret = -ENOTSUP;
  90. break;
  91. case UBLK_IO_OP_READ:
  92. case UBLK_IO_OP_WRITE:
  93. ret = loop_queue_tgt_rw_io(t, q, iod, tag);
  94. break;
  95. default:
  96. ret = -EINVAL;
  97. break;
  98. }
  99. ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
  100. iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
  101. return ret;
  102. }
  103. static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q,
  104. int tag)
  105. {
  106. int queued = loop_queue_tgt_io(t, q, tag);
  107. ublk_queued_tgt_io(t, q, tag, queued);
  108. return 0;
  109. }
  110. static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q,
  111. const struct io_uring_cqe *cqe)
  112. {
  113. unsigned tag = user_data_to_tag(cqe->user_data);
  114. unsigned op = user_data_to_op(cqe->user_data);
  115. struct ublk_io *io = ublk_get_io(q, tag);
  116. if (cqe->res < 0) {
  117. io->result = cqe->res;
  118. ublk_err("%s: io failed op %x user_data %lx\n",
  119. __func__, op, cqe->user_data);
  120. } else if (op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
  121. __s32 data_len = user_data_to_tgt_data(cqe->user_data)
  122. ? ublk_integrity_data_len(q, cqe->res)
  123. : cqe->res;
  124. if (!io->result || data_len < io->result)
  125. io->result = data_len;
  126. }
  127. /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
  128. if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
  129. io->tgt_ios += 1;
  130. if (ublk_completed_tgt_io(t, q, tag))
  131. ublk_complete_io(t, q, tag, io->result);
  132. }
  133. static int ublk_loop_memset_file(int fd, __u8 byte, size_t len)
  134. {
  135. off_t offset = 0;
  136. __u8 buf[4096];
  137. memset(buf, byte, sizeof(buf));
  138. while (len) {
  139. int ret = pwrite(fd, buf, min(len, sizeof(buf)), offset);
  140. if (ret < 0)
  141. return -errno;
  142. if (!ret)
  143. return -EIO;
  144. len -= ret;
  145. offset += ret;
  146. }
  147. return 0;
  148. }
  149. static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
  150. {
  151. unsigned long long bytes;
  152. unsigned long blocks;
  153. int ret;
  154. struct ublk_params p = {
  155. .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN,
  156. .basic = {
  157. .attrs = UBLK_ATTR_VOLATILE_CACHE,
  158. .logical_bs_shift = 9,
  159. .physical_bs_shift = 12,
  160. .io_opt_shift = 12,
  161. .io_min_shift = 9,
  162. .max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
  163. },
  164. .dma = {
  165. .alignment = 511,
  166. },
  167. };
  168. ublk_set_integrity_params(ctx, &p);
  169. if (ctx->auto_zc_fallback) {
  170. ublk_err("%s: not support auto_zc_fallback\n", __func__);
  171. return -EINVAL;
  172. }
  173. /* Use O_DIRECT only for data file */
  174. ret = backing_file_tgt_init(dev, 1);
  175. if (ret)
  176. return ret;
  177. /* Expect a second file for integrity data */
  178. if (dev->tgt.nr_backing_files != 1 + !!ctx->metadata_size)
  179. return -EINVAL;
  180. blocks = dev->tgt.backing_file_size[0] >> p.basic.logical_bs_shift;
  181. if (ctx->metadata_size) {
  182. unsigned long metadata_blocks =
  183. dev->tgt.backing_file_size[1] / ctx->metadata_size;
  184. unsigned long integrity_len;
  185. /* Ensure both data and integrity data fit in backing files */
  186. blocks = min(blocks, metadata_blocks);
  187. integrity_len = blocks * ctx->metadata_size;
  188. /*
  189. * Initialize PI app tag and ref tag to 0xFF
  190. * to disable bio-integrity-auto checks
  191. */
  192. ret = ublk_loop_memset_file(dev->fds[2], 0xFF, integrity_len);
  193. if (ret)
  194. return ret;
  195. }
  196. bytes = blocks << p.basic.logical_bs_shift;
  197. dev->tgt.dev_size = bytes;
  198. p.basic.dev_sectors = bytes >> 9;
  199. dev->tgt.params = p;
  200. return 0;
  201. }
  202. const struct ublk_tgt_ops loop_tgt_ops = {
  203. .name = "loop",
  204. .init_tgt = ublk_loop_tgt_init,
  205. .deinit_tgt = backing_file_tgt_deinit,
  206. .queue_io = ublk_loop_queue_io,
  207. .tgt_io_done = ublk_loop_io_done,
  208. };