| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- // SPDX-License-Identifier: GPL-2.0
- #include <linux/kernel.h>
- #include <linux/errno.h>
- #include <linux/mm.h>
- #include <linux/slab.h>
- #include <linux/eventfd.h>
- #include <linux/eventpoll.h>
- #include <linux/io_uring.h>
- #include <linux/io_uring_types.h>
- #include "io-wq.h"
- #include "eventfd.h"
- struct io_ev_fd {
- struct eventfd_ctx *cq_ev_fd;
- unsigned int eventfd_async;
- /* protected by ->completion_lock */
- unsigned last_cq_tail;
- refcount_t refs;
- atomic_t ops;
- struct rcu_head rcu;
- };
- enum {
- IO_EVENTFD_OP_SIGNAL_BIT,
- };
- static void io_eventfd_free(struct rcu_head *rcu)
- {
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
- eventfd_ctx_put(ev_fd->cq_ev_fd);
- kfree(ev_fd);
- }
- static void io_eventfd_put(struct io_ev_fd *ev_fd)
- {
- if (refcount_dec_and_test(&ev_fd->refs))
- call_rcu(&ev_fd->rcu, io_eventfd_free);
- }
- static void io_eventfd_do_signal(struct rcu_head *rcu)
- {
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
- io_eventfd_put(ev_fd);
- }
- /*
- * Returns true if the caller should put the ev_fd reference, false if not.
- */
- static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
- {
- if (eventfd_signal_allowed()) {
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
- return true;
- }
- if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
- call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
- return false;
- }
- return true;
- }
- /*
- * Trigger if eventfd_async isn't set, or if it's set and the caller is
- * an async worker.
- */
- static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
- {
- return !ev_fd->eventfd_async || io_wq_current_is_worker();
- }
- void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
- {
- bool skip = false;
- struct io_ev_fd *ev_fd;
- struct io_rings *rings;
- guard(rcu)();
- rings = rcu_dereference(ctx->rings_rcu);
- if (!rings)
- return;
- if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
- return;
- ev_fd = rcu_dereference(ctx->io_ev_fd);
- /*
- * Check again if ev_fd exists in case an io_eventfd_unregister call
- * completed between the NULL check of ctx->io_ev_fd at the start of
- * the function and rcu_read_lock.
- */
- if (!ev_fd)
- return;
- if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
- return;
- if (cqe_event) {
- /*
- * Eventfd should only get triggered when at least one event
- * has been posted. Some applications rely on the eventfd
- * notification count only changing IFF a new CQE has been
- * added to the CQ ring. There's no dependency on 1:1
- * relationship between how many times this function is called
- * (and hence the eventfd count) and number of CQEs posted to
- * the CQ ring.
- */
- spin_lock(&ctx->completion_lock);
- skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
- ev_fd->last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
- }
- if (skip || __io_eventfd_signal(ev_fd))
- io_eventfd_put(ev_fd);
- }
- int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
- unsigned int eventfd_async)
- {
- struct io_ev_fd *ev_fd;
- __s32 __user *fds = arg;
- int fd;
- ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
- lockdep_is_held(&ctx->uring_lock));
- if (ev_fd)
- return -EBUSY;
- if (copy_from_user(&fd, fds, sizeof(*fds)))
- return -EFAULT;
- ev_fd = kmalloc_obj(*ev_fd);
- if (!ev_fd)
- return -ENOMEM;
- ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
- if (IS_ERR(ev_fd->cq_ev_fd)) {
- int ret = PTR_ERR(ev_fd->cq_ev_fd);
- kfree(ev_fd);
- return ret;
- }
- spin_lock(&ctx->completion_lock);
- ev_fd->last_cq_tail = ctx->cached_cq_tail;
- spin_unlock(&ctx->completion_lock);
- ev_fd->eventfd_async = eventfd_async;
- ctx->has_evfd = true;
- refcount_set(&ev_fd->refs, 1);
- atomic_set(&ev_fd->ops, 0);
- rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
- return 0;
- }
- int io_eventfd_unregister(struct io_ring_ctx *ctx)
- {
- struct io_ev_fd *ev_fd;
- ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
- lockdep_is_held(&ctx->uring_lock));
- if (ev_fd) {
- ctx->has_evfd = false;
- rcu_assign_pointer(ctx->io_ev_fd, NULL);
- io_eventfd_put(ev_fd);
- return 0;
- }
- return -ENXIO;
- }
|