page_range.rs 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. // SPDX-License-Identifier: GPL-2.0
  2. // Copyright (C) 2025 Google LLC.
  3. //! This module has utilities for managing a page range where unused pages may be reclaimed by a
  4. //! vma shrinker.
  5. // To avoid deadlocks, locks are taken in the order:
  6. //
  7. // 1. mmap lock
  8. // 2. spinlock
  9. // 3. lru spinlock
  10. //
  11. // The shrinker will use trylock methods because it locks them in a different order.
  12. use crate::AssertSync;
  13. use core::{
  14. marker::PhantomPinned,
  15. mem::{size_of, size_of_val, MaybeUninit},
  16. ptr,
  17. };
  18. use kernel::{
  19. bindings,
  20. error::Result,
  21. ffi::{c_ulong, c_void},
  22. mm::{virt, Mm, MmWithUser},
  23. new_mutex, new_spinlock,
  24. page::{Page, PAGE_SHIFT, PAGE_SIZE},
  25. prelude::*,
  26. str::CStr,
  27. sync::{aref::ARef, Mutex, SpinLock},
  28. task::Pid,
  29. transmute::FromBytes,
  30. types::Opaque,
  31. uaccess::UserSliceReader,
  32. };
  33. /// Represents a shrinker that can be registered with the kernel.
  34. ///
  35. /// Each shrinker can be used by many `ShrinkablePageRange` objects.
  36. #[repr(C)]
  37. pub(crate) struct Shrinker {
  38. inner: Opaque<*mut bindings::shrinker>,
  39. list_lru: Opaque<bindings::list_lru>,
  40. }
  41. // SAFETY: The shrinker and list_lru are thread safe.
  42. unsafe impl Send for Shrinker {}
  43. // SAFETY: The shrinker and list_lru are thread safe.
  44. unsafe impl Sync for Shrinker {}
  45. impl Shrinker {
  46. /// Create a new shrinker.
  47. ///
  48. /// # Safety
  49. ///
  50. /// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have
  51. /// been called exactly once, and it must not have returned an error.
  52. pub(crate) const unsafe fn new() -> Self {
  53. Self {
  54. inner: Opaque::uninit(),
  55. list_lru: Opaque::uninit(),
  56. }
  57. }
  58. /// Register this shrinker with the kernel.
  59. pub(crate) fn register(&'static self, name: &CStr) -> Result<()> {
  60. // SAFETY: These fields are not yet used, so it's okay to zero them.
  61. unsafe {
  62. self.inner.get().write(ptr::null_mut());
  63. self.list_lru.get().write_bytes(0, 1);
  64. }
  65. // SAFETY: The field is not yet used, so we can initialize it.
  66. let ret = unsafe { bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut()) };
  67. if ret != 0 {
  68. return Err(Error::from_errno(ret));
  69. }
  70. // SAFETY: The `name` points at a valid c string.
  71. let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) };
  72. if shrinker.is_null() {
  73. // SAFETY: We initialized it, so its okay to destroy it.
  74. unsafe { bindings::list_lru_destroy(self.list_lru.get()) };
  75. return Err(Error::from_errno(ret));
  76. }
  77. // SAFETY: We're about to register the shrinker, and these are the fields we need to
  78. // initialize. (All other fields are already zeroed.)
  79. unsafe {
  80. (&raw mut (*shrinker).count_objects).write(Some(rust_shrink_count));
  81. (&raw mut (*shrinker).scan_objects).write(Some(rust_shrink_scan));
  82. (&raw mut (*shrinker).private_data).write(self.list_lru.get().cast());
  83. }
  84. // SAFETY: The new shrinker has been fully initialized, so we can register it.
  85. unsafe { bindings::shrinker_register(shrinker) };
  86. // SAFETY: This initializes the pointer to the shrinker so that we can use it.
  87. unsafe { self.inner.get().write(shrinker) };
  88. Ok(())
  89. }
  90. }
  91. /// A container that manages a page range in a vma.
  92. ///
  93. /// The pages can be thought of as an array of booleans of whether the pages are usable. The
  94. /// methods `use_range` and `stop_using_range` set all booleans in a range to true or false
  95. /// respectively. Initially, no pages are allocated. When a page is not used, it is not freed
  96. /// immediately. Instead, it is made available to the memory shrinker to free it if the device is
  97. /// under memory pressure.
  98. ///
  99. /// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no
  100. /// way to know whether an index ends up with true or false if a call to `use_range` races with
  101. /// another call to `stop_using_range` on a given index.
  102. ///
  103. /// It's also okay for the two methods to race with themselves, e.g. if two threads call
  104. /// `use_range` on the same index, then that's fine and neither call will return until the page is
  105. /// allocated and mapped.
  106. ///
  107. /// The methods that read or write to a range require that the page is marked as in use. So it is
  108. /// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or
  109. /// write to the page.
  110. #[pin_data(PinnedDrop)]
  111. pub(crate) struct ShrinkablePageRange {
  112. /// Shrinker object registered with the kernel.
  113. shrinker: &'static Shrinker,
  114. /// Pid using this page range. Only used as debugging information.
  115. pid: Pid,
  116. /// The mm for the relevant process.
  117. mm: ARef<Mm>,
  118. /// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`.
  119. #[pin]
  120. mm_lock: Mutex<()>,
  121. /// Spinlock protecting changes to pages.
  122. #[pin]
  123. lock: SpinLock<Inner>,
  124. /// Must not move, since page info has pointers back.
  125. #[pin]
  126. _pin: PhantomPinned,
  127. }
  128. // We do not define any ops. For now, used only to check identity of vmas.
  129. static BINDER_VM_OPS: AssertSync<bindings::vm_operations_struct> = AssertSync(pin_init::zeroed());
  130. // To ensure that we do not accidentally install pages into or zap pages from the wrong vma, we
  131. // check its vm_ops and private data before using it.
  132. fn check_vma(vma: &virt::VmaRef, owner: *const ShrinkablePageRange) -> Option<&virt::VmaMixedMap> {
  133. // SAFETY: Just reading the vm_ops pointer of any active vma is safe.
  134. let vm_ops = unsafe { (*vma.as_ptr()).vm_ops };
  135. if !ptr::eq(vm_ops, &BINDER_VM_OPS.0) {
  136. return None;
  137. }
  138. // SAFETY: Reading the vm_private_data pointer of a binder-owned vma is safe.
  139. let vm_private_data = unsafe { (*vma.as_ptr()).vm_private_data };
  140. // The ShrinkablePageRange is only dropped when the Process is dropped, which only happens once
  141. // the file's ->release handler is invoked, which means the ShrinkablePageRange outlives any
  142. // VMA associated with it, so there can't be any false positives due to pointer reuse here.
  143. if !ptr::eq(vm_private_data, owner.cast()) {
  144. return None;
  145. }
  146. vma.as_mixedmap_vma()
  147. }
  148. struct Inner {
  149. /// Array of pages.
  150. ///
  151. /// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive
  152. /// ownership. To deal with that, we manage it using raw pointers.
  153. pages: *mut PageInfo,
  154. /// Length of the `pages` array.
  155. size: usize,
  156. /// The address of the vma to insert the pages into.
  157. vma_addr: usize,
  158. }
  159. // SAFETY: proper locking is in place for `Inner`
  160. unsafe impl Send for Inner {}
  161. type StableMmGuard =
  162. kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>;
  163. /// An array element that describes the current state of a page.
  164. ///
  165. /// There are three states:
  166. ///
  167. /// * Free. The page is None. The `lru` element is not queued.
  168. /// * Available. The page is Some. The `lru` element is queued to the shrinker's lru.
  169. /// * Used. The page is Some. The `lru` element is not queued.
  170. ///
  171. /// When an element is available, the shrinker is able to free the page.
  172. #[repr(C)]
  173. struct PageInfo {
  174. lru: bindings::list_head,
  175. page: Option<Page>,
  176. range: *const ShrinkablePageRange,
  177. }
  178. impl PageInfo {
  179. /// # Safety
  180. ///
  181. /// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
  182. unsafe fn set_page(me: *mut PageInfo, page: Page) {
  183. // SAFETY: This pointer offset is in bounds.
  184. let ptr = unsafe { &raw mut (*me).page };
  185. // SAFETY: The pointer is valid for writing, so also valid for reading.
  186. if unsafe { (*ptr).is_some() } {
  187. pr_err!("set_page called when there is already a page");
  188. // SAFETY: We will initialize the page again below.
  189. unsafe { ptr::drop_in_place(ptr) };
  190. }
  191. // SAFETY: The pointer is valid for writing.
  192. unsafe { ptr::write(ptr, Some(page)) };
  193. }
  194. /// # Safety
  195. ///
  196. /// The caller ensures that reading from `me.page` is ok for the duration of 'a.
  197. unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
  198. // SAFETY: This pointer offset is in bounds.
  199. let ptr = unsafe { &raw const (*me).page };
  200. // SAFETY: The pointer is valid for reading.
  201. unsafe { (*ptr).as_ref() }
  202. }
  203. /// # Safety
  204. ///
  205. /// The caller ensures that writing to `me.page` is ok for the duration of 'a.
  206. unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
  207. // SAFETY: This pointer offset is in bounds.
  208. let ptr = unsafe { &raw mut (*me).page };
  209. // SAFETY: The pointer is valid for reading.
  210. unsafe { (*ptr).take() }
  211. }
  212. /// Add this page to the lru list, if not already in the list.
  213. ///
  214. /// # Safety
  215. ///
  216. /// The pointer must be valid, and it must be the right shrinker and nid.
  217. unsafe fn list_lru_add(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
  218. // SAFETY: This pointer offset is in bounds.
  219. let lru_ptr = unsafe { &raw mut (*me).lru };
  220. // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
  221. unsafe { bindings::list_lru_add(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
  222. }
  223. /// Remove this page from the lru list, if it is in the list.
  224. ///
  225. /// # Safety
  226. ///
  227. /// The pointer must be valid, and it must be the right shrinker and nid.
  228. unsafe fn list_lru_del(me: *mut PageInfo, nid: i32, shrinker: &'static Shrinker) {
  229. // SAFETY: This pointer offset is in bounds.
  230. let lru_ptr = unsafe { &raw mut (*me).lru };
  231. // SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
  232. unsafe { bindings::list_lru_del(shrinker.list_lru.get(), lru_ptr, nid, ptr::null_mut()) };
  233. }
  234. }
  235. impl ShrinkablePageRange {
  236. /// Create a new `ShrinkablePageRange` using the given shrinker.
  237. pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> {
  238. try_pin_init!(Self {
  239. shrinker,
  240. pid: kernel::current!().pid(),
  241. mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?),
  242. mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"),
  243. lock <- new_spinlock!(Inner {
  244. pages: ptr::null_mut(),
  245. size: 0,
  246. vma_addr: 0,
  247. }, "ShrinkablePageRange"),
  248. _pin: PhantomPinned,
  249. })
  250. }
  251. pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> {
  252. // SAFETY: This extends the duration of the reference. Since this call happens before
  253. // `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block
  254. // until the returned guard is dropped. This ensures that the guard is valid until dropped.
  255. let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) };
  256. mm_lock.try_lock()
  257. }
  258. /// Register a vma with this page range. Returns the size of the region.
  259. pub(crate) fn register_with_vma(&self, vma: &virt::VmaNew) -> Result<usize> {
  260. let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize);
  261. let num_pages = num_bytes >> PAGE_SHIFT;
  262. if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) {
  263. pr_debug!("Failed to register with vma: invalid vma->vm_mm");
  264. return Err(EINVAL);
  265. }
  266. if num_pages == 0 {
  267. pr_debug!("Failed to register with vma: size zero");
  268. return Err(EINVAL);
  269. }
  270. let mut pages = KVVec::<PageInfo>::with_capacity(num_pages, GFP_KERNEL)?;
  271. // SAFETY: This just initializes the pages array.
  272. unsafe {
  273. let self_ptr = self as *const ShrinkablePageRange;
  274. for i in 0..num_pages {
  275. let info = pages.as_mut_ptr().add(i);
  276. (&raw mut (*info).range).write(self_ptr);
  277. (&raw mut (*info).page).write(None);
  278. let lru = &raw mut (*info).lru;
  279. (&raw mut (*lru).next).write(lru);
  280. (&raw mut (*lru).prev).write(lru);
  281. }
  282. }
  283. let mut inner = self.lock.lock();
  284. if inner.size > 0 {
  285. pr_debug!("Failed to register with vma: already registered");
  286. drop(inner);
  287. return Err(EBUSY);
  288. }
  289. inner.pages = pages.into_raw_parts().0;
  290. inner.size = num_pages;
  291. inner.vma_addr = vma.start();
  292. // This pointer is only used for comparison - it's not dereferenced.
  293. //
  294. // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on
  295. // `vm_private_data`.
  296. unsafe {
  297. (*vma.as_ptr()).vm_private_data = ptr::from_ref(self).cast_mut().cast::<c_void>()
  298. };
  299. // SAFETY: We own the vma, and we don't use any methods on VmaNew that rely on
  300. // `vm_ops`.
  301. unsafe { (*vma.as_ptr()).vm_ops = &BINDER_VM_OPS.0 };
  302. Ok(num_pages)
  303. }
  304. /// Make sure that the given pages are allocated and mapped.
  305. ///
  306. /// Must not be called from an atomic context.
  307. pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> {
  308. if start >= end {
  309. return Ok(());
  310. }
  311. let mut inner = self.lock.lock();
  312. assert!(end <= inner.size);
  313. for i in start..end {
  314. // SAFETY: This pointer offset is in bounds.
  315. let page_info = unsafe { inner.pages.add(i) };
  316. // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
  317. if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
  318. // Since we're going to use the page, we should remove it from the lru list so that
  319. // the shrinker will not free it.
  320. //
  321. // SAFETY: The pointer is valid, and this is the right shrinker.
  322. //
  323. // The shrinker can't free the page between the check and this call to
  324. // `list_lru_del` because we hold the lock.
  325. unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
  326. } else {
  327. // We have to allocate a new page. Use the slow path.
  328. drop(inner);
  329. // SAFETY: `i < end <= inner.size` so `i` is in bounds.
  330. match unsafe { self.use_page_slow(i) } {
  331. Ok(()) => {}
  332. Err(err) => {
  333. pr_warn!("Error in use_page_slow: {:?}", err);
  334. return Err(err);
  335. }
  336. }
  337. inner = self.lock.lock();
  338. }
  339. }
  340. Ok(())
  341. }
  342. /// Mark the given page as in use, slow path.
  343. ///
  344. /// Must not be called from an atomic context.
  345. ///
  346. /// # Safety
  347. ///
  348. /// Assumes that `i` is in bounds.
  349. #[cold]
  350. unsafe fn use_page_slow(&self, i: usize) -> Result<()> {
  351. let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?;
  352. let mm_mutex = self.mm_lock.lock();
  353. let inner = self.lock.lock();
  354. // SAFETY: This pointer offset is in bounds.
  355. let page_info = unsafe { inner.pages.add(i) };
  356. // SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
  357. if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
  358. // The page was already there, or someone else added the page while we didn't hold the
  359. // spinlock.
  360. //
  361. // SAFETY: The pointer is valid, and this is the right shrinker.
  362. //
  363. // The shrinker can't free the page between the check and this call to
  364. // `list_lru_del` because we hold the lock.
  365. unsafe { PageInfo::list_lru_del(page_info, page.nid(), self.shrinker) };
  366. return Ok(());
  367. }
  368. let vma_addr = inner.vma_addr;
  369. // Release the spinlock while we insert the page into the vma.
  370. drop(inner);
  371. // No overflow since we stay in bounds of the vma.
  372. let user_page_addr = vma_addr + (i << PAGE_SHIFT);
  373. // We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from
  374. // a remote process. If the call to `mmput` races with the process shutting down, then the
  375. // caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't
  376. // happen until it returns to userspace. However, the caller might instead go to sleep and
  377. // wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the
  378. // middle of a shutdown process that won't complete until the `mm` is dropped. This can
  379. // amount to a deadlock.
  380. //
  381. // Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a
  382. // workqueue.
  383. let mm = MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?);
  384. {
  385. let vma_read;
  386. let mmap_read;
  387. let vma = if let Some(ret) = mm.lock_vma_under_rcu(vma_addr) {
  388. vma_read = ret;
  389. check_vma(&vma_read, self)
  390. } else {
  391. mmap_read = mm.mmap_read_lock();
  392. mmap_read
  393. .vma_lookup(vma_addr)
  394. .and_then(|vma| check_vma(vma, self))
  395. };
  396. match vma {
  397. Some(vma) => vma.vm_insert_page(user_page_addr, &new_page)?,
  398. None => return Err(ESRCH),
  399. }
  400. }
  401. let inner = self.lock.lock();
  402. // SAFETY: The `page_info` pointer is valid and currently does not have a page. The page
  403. // can be written to since we hold the lock.
  404. //
  405. // We released and reacquired the spinlock since we checked that the page is null, but we
  406. // always hold the mm_lock mutex when setting the page to a non-null value, so it's not
  407. // possible for someone else to have changed it since our check.
  408. unsafe { PageInfo::set_page(page_info, new_page) };
  409. drop(inner);
  410. drop(mm_mutex);
  411. Ok(())
  412. }
  413. /// If the given page is in use, then mark it as available so that the shrinker can free it.
  414. ///
  415. /// May be called from an atomic context.
  416. pub(crate) fn stop_using_range(&self, start: usize, end: usize) {
  417. if start >= end {
  418. return;
  419. }
  420. let inner = self.lock.lock();
  421. assert!(end <= inner.size);
  422. for i in (start..end).rev() {
  423. // SAFETY: The pointer is in bounds.
  424. let page_info = unsafe { inner.pages.add(i) };
  425. // SAFETY: Okay for reading since we have the lock.
  426. if let Some(page) = unsafe { PageInfo::get_page(page_info) } {
  427. // SAFETY: The pointer is valid, and it's the right shrinker.
  428. unsafe { PageInfo::list_lru_add(page_info, page.nid(), self.shrinker) };
  429. }
  430. }
  431. }
  432. /// Helper for reading or writing to a range of bytes that may overlap with several pages.
  433. ///
  434. /// # Safety
  435. ///
  436. /// All pages touched by this operation must be in use for the duration of this call.
  437. unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result
  438. where
  439. T: FnMut(&Page, usize, usize) -> Result,
  440. {
  441. if size == 0 {
  442. return Ok(());
  443. }
  444. let (pages, num_pages) = {
  445. let inner = self.lock.lock();
  446. (inner.pages, inner.size)
  447. };
  448. let num_bytes = num_pages << PAGE_SHIFT;
  449. // Check that the request is within the buffer.
  450. if offset.checked_add(size).ok_or(EFAULT)? > num_bytes {
  451. return Err(EFAULT);
  452. }
  453. let mut page_index = offset >> PAGE_SHIFT;
  454. offset &= PAGE_SIZE - 1;
  455. while size > 0 {
  456. let available = usize::min(size, PAGE_SIZE - offset);
  457. // SAFETY: The pointer is in bounds.
  458. let page_info = unsafe { pages.add(page_index) };
  459. // SAFETY: The caller guarantees that this page is in the "in use" state for the
  460. // duration of this call to `iterate`, so nobody will change the page.
  461. let page = unsafe { PageInfo::get_page(page_info) };
  462. if page.is_none() {
  463. pr_warn!("Page is null!");
  464. }
  465. let page = page.ok_or(EFAULT)?;
  466. cb(page, offset, available)?;
  467. size -= available;
  468. page_index += 1;
  469. offset = 0;
  470. }
  471. Ok(())
  472. }
  473. /// Copy from userspace into this page range.
  474. ///
  475. /// # Safety
  476. ///
  477. /// All pages touched by this operation must be in use for the duration of this call.
  478. pub(crate) unsafe fn copy_from_user_slice(
  479. &self,
  480. reader: &mut UserSliceReader,
  481. offset: usize,
  482. size: usize,
  483. ) -> Result {
  484. // SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`.
  485. unsafe {
  486. self.iterate(offset, size, |page, offset, to_copy| {
  487. page.copy_from_user_slice_raw(reader, offset, to_copy)
  488. })
  489. }
  490. }
  491. /// Copy from this page range into kernel space.
  492. ///
  493. /// # Safety
  494. ///
  495. /// All pages touched by this operation must be in use for the duration of this call.
  496. pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
  497. let mut out = MaybeUninit::<T>::uninit();
  498. let mut out_offset = 0;
  499. // SAFETY: `self.iterate` has the same safety requirements as `read`.
  500. unsafe {
  501. self.iterate(offset, size_of::<T>(), |page, offset, to_copy| {
  502. // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
  503. let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset);
  504. // SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid.
  505. page.read_raw(obj_ptr, offset, to_copy)?;
  506. out_offset += to_copy;
  507. Ok(())
  508. })?;
  509. }
  510. // SAFETY: We just initialised the data.
  511. Ok(unsafe { out.assume_init() })
  512. }
  513. /// Copy from kernel space into this page range.
  514. ///
  515. /// # Safety
  516. ///
  517. /// All pages touched by this operation must be in use for the duration of this call.
  518. pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
  519. let mut obj_offset = 0;
  520. // SAFETY: `self.iterate` has the same safety requirements as `write`.
  521. unsafe {
  522. self.iterate(offset, size_of_val(obj), |page, offset, to_copy| {
  523. // SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
  524. let obj_ptr = (obj as *const T as *const u8).add(obj_offset);
  525. // SAFETY: We have a reference to the object, so the pointer is valid.
  526. page.write_raw(obj_ptr, offset, to_copy)?;
  527. obj_offset += to_copy;
  528. Ok(())
  529. })
  530. }
  531. }
  532. /// Write zeroes to the given range.
  533. ///
  534. /// # Safety
  535. ///
  536. /// All pages touched by this operation must be in use for the duration of this call.
  537. pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result {
  538. // SAFETY: `self.iterate` has the same safety requirements as `copy_into`.
  539. unsafe {
  540. self.iterate(offset, size, |page, offset, len| {
  541. page.fill_zero_raw(offset, len)
  542. })
  543. }
  544. }
  545. }
  546. #[pinned_drop]
  547. impl PinnedDrop for ShrinkablePageRange {
  548. fn drop(self: Pin<&mut Self>) {
  549. let (pages, size) = {
  550. let lock = self.lock.lock();
  551. (lock.pages, lock.size)
  552. };
  553. if size == 0 {
  554. return;
  555. }
  556. // Note: This call is also necessary for the safety of `stable_trylock_mm`.
  557. let mm_lock = self.mm_lock.lock();
  558. // This is the destructor, so unlike the other methods, we only need to worry about races
  559. // with the shrinker here. Since we hold the `mm_lock`, we also can't race with the
  560. // shrinker, and after this loop, the shrinker will not access any of our pages since we
  561. // removed them from the lru list.
  562. for i in 0..size {
  563. // SAFETY: Loop is in-bounds of the size.
  564. let p_ptr = unsafe { pages.add(i) };
  565. // SAFETY: No other readers, so we can read.
  566. if let Some(p) = unsafe { PageInfo::get_page(p_ptr) } {
  567. // SAFETY: The pointer is valid and it's the right shrinker.
  568. unsafe { PageInfo::list_lru_del(p_ptr, p.nid(), self.shrinker) };
  569. }
  570. }
  571. drop(mm_lock);
  572. // SAFETY: `pages` was allocated as an `KVVec<PageInfo>` with capacity `size`. Furthermore,
  573. // all `size` elements are initialized. Also, the array is no longer shared with the
  574. // shrinker due to the above loop.
  575. drop(unsafe { KVVec::from_raw_parts(pages, size, size) });
  576. }
  577. }
  578. /// # Safety
  579. /// Called by the shrinker.
  580. #[no_mangle]
  581. unsafe extern "C" fn rust_shrink_count(
  582. shrink: *mut bindings::shrinker,
  583. _sc: *mut bindings::shrink_control,
  584. ) -> c_ulong {
  585. // SAFETY: We can access our own private data.
  586. let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
  587. // SAFETY: Accessing the lru list is okay. Just an FFI call.
  588. unsafe { bindings::list_lru_count(list_lru) }
  589. }
  590. /// # Safety
  591. /// Called by the shrinker.
  592. #[no_mangle]
  593. unsafe extern "C" fn rust_shrink_scan(
  594. shrink: *mut bindings::shrinker,
  595. sc: *mut bindings::shrink_control,
  596. ) -> c_ulong {
  597. // SAFETY: We can access our own private data.
  598. let list_lru = unsafe { (*shrink).private_data.cast::<bindings::list_lru>() };
  599. // SAFETY: Caller guarantees that it is safe to read this field.
  600. let nr_to_scan = unsafe { (*sc).nr_to_scan };
  601. // SAFETY: Accessing the lru list is okay. Just an FFI call.
  602. unsafe {
  603. bindings::list_lru_walk(
  604. list_lru,
  605. Some(bindings::rust_shrink_free_page_wrap),
  606. ptr::null_mut(),
  607. nr_to_scan,
  608. )
  609. }
  610. }
  611. const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP;
  612. const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY;
  613. /// # Safety
  614. /// Called by the shrinker.
  615. #[no_mangle]
  616. unsafe extern "C" fn rust_shrink_free_page(
  617. item: *mut bindings::list_head,
  618. lru: *mut bindings::list_lru_one,
  619. _cb_arg: *mut c_void,
  620. ) -> bindings::lru_status {
  621. // Fields that should survive after unlocking the lru lock.
  622. let page;
  623. let page_index;
  624. let mm;
  625. let mmap_read;
  626. let mm_mutex;
  627. let vma_addr;
  628. let range_ptr;
  629. {
  630. // CAST: The `list_head` field is first in `PageInfo`.
  631. let info = item as *mut PageInfo;
  632. // SAFETY: The `range` field of `PageInfo` is immutable.
  633. range_ptr = unsafe { (*info).range };
  634. // SAFETY: The `range` outlives its `PageInfo` values.
  635. let range = unsafe { &*range_ptr };
  636. mm = match range.mm.mmget_not_zero() {
  637. Some(mm) => MmWithUser::into_mmput_async(mm),
  638. None => return LRU_SKIP,
  639. };
  640. mm_mutex = match range.stable_trylock_mm() {
  641. Some(guard) => guard,
  642. None => return LRU_SKIP,
  643. };
  644. mmap_read = match mm.mmap_read_trylock() {
  645. Some(guard) => guard,
  646. None => return LRU_SKIP,
  647. };
  648. // We can't lock it normally here, since we hold the lru lock.
  649. let inner = match range.lock.try_lock() {
  650. Some(inner) => inner,
  651. None => return LRU_SKIP,
  652. };
  653. // SAFETY: The item is in this lru list, so it's okay to remove it.
  654. unsafe { bindings::list_lru_isolate(lru, item) };
  655. // SAFETY: Both pointers are in bounds of the same allocation.
  656. page_index = unsafe { info.offset_from(inner.pages) } as usize;
  657. // SAFETY: We hold the spinlock, so we can take the page.
  658. //
  659. // This sets the page pointer to zero before we unmap it from the vma. However, we call
  660. // `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to
  661. // insert a new page until after our call to `zap_page_range`.
  662. page = unsafe { PageInfo::take_page(info) };
  663. vma_addr = inner.vma_addr;
  664. // From this point on, we don't access this PageInfo or ShrinkablePageRange again, because
  665. // they can be freed at any point after we unlock `lru_lock`. This is with the exception of
  666. // `mm_mutex` which is kept alive by holding the lock.
  667. }
  668. // SAFETY: The lru lock is locked when this method is called.
  669. unsafe { bindings::spin_unlock(&raw mut (*lru).lock) };
  670. if let Some(unchecked_vma) = mmap_read.vma_lookup(vma_addr) {
  671. if let Some(vma) = check_vma(unchecked_vma, range_ptr) {
  672. let user_page_addr = vma_addr + (page_index << PAGE_SHIFT);
  673. vma.zap_page_range_single(user_page_addr, PAGE_SIZE);
  674. }
  675. }
  676. drop(mmap_read);
  677. drop(mm_mutex);
  678. drop(mm);
  679. drop(page);
  680. LRU_REMOVED_ENTRY
  681. }