eventfd.c 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/kernel.h>
  3. #include <linux/errno.h>
  4. #include <linux/mm.h>
  5. #include <linux/slab.h>
  6. #include <linux/eventfd.h>
  7. #include <linux/eventpoll.h>
  8. #include <linux/io_uring.h>
  9. #include <linux/io_uring_types.h>
  10. #include "io-wq.h"
  11. #include "eventfd.h"
  12. struct io_ev_fd {
  13. struct eventfd_ctx *cq_ev_fd;
  14. unsigned int eventfd_async;
  15. /* protected by ->completion_lock */
  16. unsigned last_cq_tail;
  17. refcount_t refs;
  18. atomic_t ops;
  19. struct rcu_head rcu;
  20. };
  21. enum {
  22. IO_EVENTFD_OP_SIGNAL_BIT,
  23. };
  24. static void io_eventfd_free(struct rcu_head *rcu)
  25. {
  26. struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
  27. eventfd_ctx_put(ev_fd->cq_ev_fd);
  28. kfree(ev_fd);
  29. }
  30. static void io_eventfd_put(struct io_ev_fd *ev_fd)
  31. {
  32. if (refcount_dec_and_test(&ev_fd->refs))
  33. call_rcu(&ev_fd->rcu, io_eventfd_free);
  34. }
  35. static void io_eventfd_do_signal(struct rcu_head *rcu)
  36. {
  37. struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
  38. eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
  39. io_eventfd_put(ev_fd);
  40. }
  41. /*
  42. * Returns true if the caller should put the ev_fd reference, false if not.
  43. */
  44. static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
  45. {
  46. if (eventfd_signal_allowed()) {
  47. eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
  48. return true;
  49. }
  50. if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
  51. call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
  52. return false;
  53. }
  54. return true;
  55. }
  56. /*
  57. * Trigger if eventfd_async isn't set, or if it's set and the caller is
  58. * an async worker.
  59. */
  60. static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
  61. {
  62. return !ev_fd->eventfd_async || io_wq_current_is_worker();
  63. }
  64. void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
  65. {
  66. bool skip = false;
  67. struct io_ev_fd *ev_fd;
  68. struct io_rings *rings;
  69. guard(rcu)();
  70. rings = rcu_dereference(ctx->rings_rcu);
  71. if (!rings)
  72. return;
  73. if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
  74. return;
  75. ev_fd = rcu_dereference(ctx->io_ev_fd);
  76. /*
  77. * Check again if ev_fd exists in case an io_eventfd_unregister call
  78. * completed between the NULL check of ctx->io_ev_fd at the start of
  79. * the function and rcu_read_lock.
  80. */
  81. if (!ev_fd)
  82. return;
  83. if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
  84. return;
  85. if (cqe_event) {
  86. /*
  87. * Eventfd should only get triggered when at least one event
  88. * has been posted. Some applications rely on the eventfd
  89. * notification count only changing IFF a new CQE has been
  90. * added to the CQ ring. There's no dependency on 1:1
  91. * relationship between how many times this function is called
  92. * (and hence the eventfd count) and number of CQEs posted to
  93. * the CQ ring.
  94. */
  95. spin_lock(&ctx->completion_lock);
  96. skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
  97. ev_fd->last_cq_tail = ctx->cached_cq_tail;
  98. spin_unlock(&ctx->completion_lock);
  99. }
  100. if (skip || __io_eventfd_signal(ev_fd))
  101. io_eventfd_put(ev_fd);
  102. }
  103. int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
  104. unsigned int eventfd_async)
  105. {
  106. struct io_ev_fd *ev_fd;
  107. __s32 __user *fds = arg;
  108. int fd;
  109. ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
  110. lockdep_is_held(&ctx->uring_lock));
  111. if (ev_fd)
  112. return -EBUSY;
  113. if (copy_from_user(&fd, fds, sizeof(*fds)))
  114. return -EFAULT;
  115. ev_fd = kmalloc_obj(*ev_fd);
  116. if (!ev_fd)
  117. return -ENOMEM;
  118. ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
  119. if (IS_ERR(ev_fd->cq_ev_fd)) {
  120. int ret = PTR_ERR(ev_fd->cq_ev_fd);
  121. kfree(ev_fd);
  122. return ret;
  123. }
  124. spin_lock(&ctx->completion_lock);
  125. ev_fd->last_cq_tail = ctx->cached_cq_tail;
  126. spin_unlock(&ctx->completion_lock);
  127. ev_fd->eventfd_async = eventfd_async;
  128. ctx->has_evfd = true;
  129. refcount_set(&ev_fd->refs, 1);
  130. atomic_set(&ev_fd->ops, 0);
  131. rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
  132. return 0;
  133. }
  134. int io_eventfd_unregister(struct io_ring_ctx *ctx)
  135. {
  136. struct io_ev_fd *ev_fd;
  137. ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
  138. lockdep_is_held(&ctx->uring_lock));
  139. if (ev_fd) {
  140. ctx->has_evfd = false;
  141. rcu_assign_pointer(ctx->io_ev_fd, NULL);
  142. io_eventfd_put(ev_fd);
  143. return 0;
  144. }
  145. return -ENXIO;
  146. }