deferred_close.rs 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. // SPDX-License-Identifier: GPL-2.0
  2. // Copyright (C) 2025 Google LLC.
  3. //! Logic for closing files in a deferred manner.
  4. //!
  5. //! This file could make sense to have in `kernel::fs`, but it was rejected for being too
  6. //! Binder-specific.
  7. use core::mem::MaybeUninit;
  8. use kernel::{
  9. alloc::{AllocError, Flags},
  10. bindings,
  11. prelude::*,
  12. };
  13. /// Helper used for closing file descriptors in a way that is safe even if the file is currently
  14. /// held using `fdget`.
  15. ///
  16. /// Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to
  17. /// ksys_close() during fdget()") and in the comments on `binder_do_fd_close`.
  18. pub(crate) struct DeferredFdCloser {
  19. inner: KBox<DeferredFdCloserInner>,
  20. }
  21. /// SAFETY: This just holds an allocation with no real content, so there's no safety issue with
  22. /// moving it across threads.
  23. unsafe impl Send for DeferredFdCloser {}
  24. /// SAFETY: This just holds an allocation with no real content, so there's no safety issue with
  25. /// moving it across threads.
  26. unsafe impl Sync for DeferredFdCloser {}
  27. /// # Invariants
  28. ///
  29. /// If the `file` pointer is non-null, then it points at a `struct file` and owns a refcount to
  30. /// that file.
  31. #[repr(C)]
  32. struct DeferredFdCloserInner {
  33. twork: MaybeUninit<bindings::callback_head>,
  34. file: *mut bindings::file,
  35. }
  36. impl DeferredFdCloser {
  37. /// Create a new [`DeferredFdCloser`].
  38. pub(crate) fn new(flags: Flags) -> Result<Self, AllocError> {
  39. Ok(Self {
  40. // INVARIANT: The `file` pointer is null, so the type invariant does not apply.
  41. inner: KBox::new(
  42. DeferredFdCloserInner {
  43. twork: MaybeUninit::uninit(),
  44. file: core::ptr::null_mut(),
  45. },
  46. flags,
  47. )?,
  48. })
  49. }
  50. /// Schedule a task work that closes the file descriptor when this task returns to userspace.
  51. ///
  52. /// Fails if this is called from a context where we cannot run work when returning to
  53. /// userspace. (E.g., from a kthread.)
  54. pub(crate) fn close_fd(self, fd: u32) -> Result<(), DeferredFdCloseError> {
  55. use bindings::task_work_notify_mode_TWA_RESUME as TWA_RESUME;
  56. // In this method, we schedule the task work before closing the file. This is because
  57. // scheduling a task work is fallible, and we need to know whether it will fail before we
  58. // attempt to close the file.
  59. // Task works are not available on kthreads.
  60. let current = kernel::current!();
  61. // Check if this is a kthread.
  62. // SAFETY: Reading `flags` from a task is always okay.
  63. if unsafe { ((*current.as_ptr()).flags & bindings::PF_KTHREAD) != 0 } {
  64. return Err(DeferredFdCloseError::TaskWorkUnavailable);
  65. }
  66. // Transfer ownership of the box's allocation to a raw pointer. This disables the
  67. // destructor, so we must manually convert it back to a KBox to drop it.
  68. //
  69. // Until we convert it back to a `KBox`, there are no aliasing requirements on this
  70. // pointer.
  71. let inner = KBox::into_raw(self.inner);
  72. // The `callback_head` field is first in the struct, so this cast correctly gives us a
  73. // pointer to the field.
  74. let callback_head = inner.cast::<bindings::callback_head>();
  75. // SAFETY: This pointer offset operation does not go out-of-bounds.
  76. let file_field = unsafe { core::ptr::addr_of_mut!((*inner).file) };
  77. let current = current.as_ptr();
  78. // SAFETY: This function currently has exclusive access to the `DeferredFdCloserInner`, so
  79. // it is okay for us to perform unsynchronized writes to its `callback_head` field.
  80. unsafe { bindings::init_task_work(callback_head, Some(Self::do_close_fd)) };
  81. // SAFETY: This inserts the `DeferredFdCloserInner` into the task workqueue for the current
  82. // task. If this operation is successful, then this transfers exclusive ownership of the
  83. // `callback_head` field to the C side until it calls `do_close_fd`, and we don't touch or
  84. // invalidate the field during that time.
  85. //
  86. // When the C side calls `do_close_fd`, the safety requirements of that method are
  87. // satisfied because when a task work is executed, the callback is given ownership of the
  88. // pointer.
  89. //
  90. // The file pointer is currently null. If it is changed to be non-null before `do_close_fd`
  91. // is called, then that change happens due to the write at the end of this function, and
  92. // that write has a safety comment that explains why the refcount can be dropped when
  93. // `do_close_fd` runs.
  94. let res = unsafe { bindings::task_work_add(current, callback_head, TWA_RESUME) };
  95. if res != 0 {
  96. // SAFETY: Scheduling the task work failed, so we still have ownership of the box, so
  97. // we may destroy it.
  98. unsafe { drop(KBox::from_raw(inner)) };
  99. return Err(DeferredFdCloseError::TaskWorkUnavailable);
  100. }
  101. // This removes the fd from the fd table in `current`. The file is not fully closed until
  102. // `filp_close` is called. We are given ownership of one refcount to the file.
  103. //
  104. // SAFETY: This is safe no matter what `fd` is. If the `fd` is valid (that is, if the
  105. // pointer is non-null), then we call `filp_close` on the returned pointer as required by
  106. // `file_close_fd`.
  107. let file = unsafe { bindings::file_close_fd(fd) };
  108. if file.is_null() {
  109. // We don't clean up the task work since that might be expensive if the task work queue
  110. // is long. Just let it execute and let it clean up for itself.
  111. return Err(DeferredFdCloseError::BadFd);
  112. }
  113. // Acquire a second refcount to the file.
  114. //
  115. // SAFETY: The `file` pointer points at a file with a non-zero refcount.
  116. unsafe { bindings::get_file(file) };
  117. // This method closes the fd, consuming one of our two refcounts. There could be active
  118. // light refcounts created from that fd, so we must ensure that the file has a positive
  119. // refcount for the duration of those active light refcounts. We do that by holding on to
  120. // the second refcount until the current task returns to userspace.
  121. //
  122. // SAFETY: The `file` pointer is valid. Passing `current->files` as the file table to close
  123. // it in is correct, since we just got the `fd` from `file_close_fd` which also uses
  124. // `current->files`.
  125. //
  126. // Note: fl_owner_t is currently a void pointer.
  127. unsafe { bindings::filp_close(file, (*current).files as bindings::fl_owner_t) };
  128. // We update the file pointer that the task work is supposed to fput. This transfers
  129. // ownership of our last refcount.
  130. //
  131. // INVARIANT: This changes the `file` field of a `DeferredFdCloserInner` from null to
  132. // non-null. This doesn't break the type invariant for `DeferredFdCloserInner` because we
  133. // still own a refcount to the file, so we can pass ownership of that refcount to the
  134. // `DeferredFdCloserInner`.
  135. //
  136. // When `do_close_fd` runs, it must be safe for it to `fput` the refcount. However, this is
  137. // the case because all light refcounts that are associated with the fd we closed
  138. // previously must be dropped when `do_close_fd`, since light refcounts must be dropped
  139. // before returning to userspace.
  140. //
  141. // SAFETY: Task works are executed on the current thread right before we return to
  142. // userspace, so this write is guaranteed to happen before `do_close_fd` is called, which
  143. // means that a race is not possible here.
  144. unsafe { *file_field = file };
  145. Ok(())
  146. }
  147. /// # Safety
  148. ///
  149. /// The provided pointer must point at the `twork` field of a `DeferredFdCloserInner` stored in
  150. /// a `KBox`, and the caller must pass exclusive ownership of that `KBox`. Furthermore, if the
  151. /// file pointer is non-null, then it must be okay to release the refcount by calling `fput`.
  152. unsafe extern "C" fn do_close_fd(inner: *mut bindings::callback_head) {
  153. // SAFETY: The caller just passed us ownership of this box.
  154. let inner = unsafe { KBox::from_raw(inner.cast::<DeferredFdCloserInner>()) };
  155. if !inner.file.is_null() {
  156. // SAFETY: By the type invariants, we own a refcount to this file, and the caller
  157. // guarantees that dropping the refcount now is okay.
  158. unsafe { bindings::fput(inner.file) };
  159. }
  160. // The allocation is freed when `inner` goes out of scope.
  161. }
  162. }
  163. /// Represents a failure to close an fd in a deferred manner.
  164. #[derive(Copy, Clone, Debug, Eq, PartialEq)]
  165. pub(crate) enum DeferredFdCloseError {
  166. /// Closing the fd failed because we were unable to schedule a task work.
  167. TaskWorkUnavailable,
  168. /// Closing the fd failed because the fd does not exist.
  169. BadFd,
  170. }
  171. impl From<DeferredFdCloseError> for Error {
  172. fn from(err: DeferredFdCloseError) -> Error {
  173. match err {
  174. DeferredFdCloseError::TaskWorkUnavailable => ESRCH,
  175. DeferredFdCloseError::BadFd => EBADF,
  176. }
  177. }
  178. }