swap.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _MM_SWAP_H
  3. #define _MM_SWAP_H
  4. #include <linux/atomic.h> /* for atomic_long_t */
  5. struct mempolicy;
  6. struct swap_iocb;
  7. extern int page_cluster;
  8. #ifdef CONFIG_THP_SWAP
  9. #define SWAPFILE_CLUSTER HPAGE_PMD_NR
  10. #define swap_entry_order(order) (order)
  11. #else
  12. #define SWAPFILE_CLUSTER 256
  13. #define swap_entry_order(order) 0
  14. #endif
  15. extern struct swap_info_struct *swap_info[];
  16. /*
  17. * We use this to track usage of a cluster. A cluster is a block of swap disk
  18. * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
  19. * free clusters are organized into a list. We fetch an entry from the list to
  20. * get a free cluster.
  21. *
  22. * The flags field determines if a cluster is free. This is
  23. * protected by cluster lock.
  24. */
  25. struct swap_cluster_info {
  26. spinlock_t lock; /*
  27. * Protect swap_cluster_info fields
  28. * other than list, and swap_info_struct->swap_map
  29. * elements corresponding to the swap cluster.
  30. */
  31. u16 count;
  32. u8 flags;
  33. u8 order;
  34. atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */
  35. struct list_head list;
  36. };
  37. /* All on-list cluster must have a non-zero flag. */
  38. enum swap_cluster_flags {
  39. CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
  40. CLUSTER_FLAG_FREE,
  41. CLUSTER_FLAG_NONFULL,
  42. CLUSTER_FLAG_FRAG,
  43. /* Clusters with flags above are allocatable */
  44. CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
  45. CLUSTER_FLAG_FULL,
  46. CLUSTER_FLAG_DISCARD,
  47. CLUSTER_FLAG_MAX,
  48. };
  49. #ifdef CONFIG_SWAP
  50. #include <linux/swapops.h> /* for swp_offset */
  51. #include <linux/blk_types.h> /* for bio_end_io_t */
  52. static inline unsigned int swp_cluster_offset(swp_entry_t entry)
  53. {
  54. return swp_offset(entry) % SWAPFILE_CLUSTER;
  55. }
  56. /*
  57. * Callers of all helpers below must ensure the entry, type, or offset is
  58. * valid, and protect the swap device with reference count or locks.
  59. */
  60. static inline struct swap_info_struct *__swap_type_to_info(int type)
  61. {
  62. struct swap_info_struct *si;
  63. si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
  64. VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
  65. return si;
  66. }
  67. static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
  68. {
  69. return __swap_type_to_info(swp_type(entry));
  70. }
  71. static inline struct swap_cluster_info *__swap_offset_to_cluster(
  72. struct swap_info_struct *si, pgoff_t offset)
  73. {
  74. VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
  75. VM_WARN_ON_ONCE(offset >= si->max);
  76. return &si->cluster_info[offset / SWAPFILE_CLUSTER];
  77. }
  78. static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry)
  79. {
  80. return __swap_offset_to_cluster(__swap_entry_to_info(entry),
  81. swp_offset(entry));
  82. }
  83. static __always_inline struct swap_cluster_info *__swap_cluster_lock(
  84. struct swap_info_struct *si, unsigned long offset, bool irq)
  85. {
  86. struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset);
  87. /*
  88. * Nothing modifies swap cache in an IRQ context. All access to
  89. * swap cache is wrapped by swap_cache_* helpers, and swap cache
  90. * writeback is handled outside of IRQs. Swapin or swapout never
  91. * occurs in IRQ, and neither does in-place split or replace.
  92. *
  93. * Besides, modifying swap cache requires synchronization with
  94. * swap_map, which was never IRQ safe.
  95. */
  96. VM_WARN_ON_ONCE(!in_task());
  97. VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
  98. if (irq)
  99. spin_lock_irq(&ci->lock);
  100. else
  101. spin_lock(&ci->lock);
  102. return ci;
  103. }
  104. /**
  105. * swap_cluster_lock - Lock and return the swap cluster of given offset.
  106. * @si: swap device the cluster belongs to.
  107. * @offset: the swap entry offset, pointing to a valid slot.
  108. *
  109. * Context: The caller must ensure the offset is in the valid range and
  110. * protect the swap device with reference count or locks.
  111. */
  112. static inline struct swap_cluster_info *swap_cluster_lock(
  113. struct swap_info_struct *si, unsigned long offset)
  114. {
  115. return __swap_cluster_lock(si, offset, false);
  116. }
  117. static inline struct swap_cluster_info *__swap_cluster_get_and_lock(
  118. const struct folio *folio, bool irq)
  119. {
  120. VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
  121. VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
  122. return __swap_cluster_lock(__swap_entry_to_info(folio->swap),
  123. swp_offset(folio->swap), irq);
  124. }
  125. /*
  126. * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries.
  127. * @folio: The folio.
  128. *
  129. * This locks and returns the swap cluster that contains a folio's swap
  130. * entries. The swap entries of a folio are always in one single cluster.
  131. * The folio has to be locked so its swap entries won't change and the
  132. * cluster won't be freed.
  133. *
  134. * Context: Caller must ensure the folio is locked and in the swap cache.
  135. * Return: Pointer to the swap cluster.
  136. */
  137. static inline struct swap_cluster_info *swap_cluster_get_and_lock(
  138. const struct folio *folio)
  139. {
  140. return __swap_cluster_get_and_lock(folio, false);
  141. }
  142. /*
  143. * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries.
  144. * @folio: The folio.
  145. *
  146. * Same as swap_cluster_get_and_lock but also disable IRQ.
  147. *
  148. * Context: Caller must ensure the folio is locked and in the swap cache.
  149. * Return: Pointer to the swap cluster.
  150. */
  151. static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
  152. const struct folio *folio)
  153. {
  154. return __swap_cluster_get_and_lock(folio, true);
  155. }
  156. static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
  157. {
  158. spin_unlock(&ci->lock);
  159. }
  160. static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
  161. {
  162. spin_unlock_irq(&ci->lock);
  163. }
  164. /*
  165. * Below are the core routines for doing swap for a folio.
  166. * All helpers requires the folio to be locked, and a locked folio
  167. * in the swap cache pins the swap entries / slots allocated to the
  168. * folio, swap relies heavily on the swap cache and folio lock for
  169. * synchronization.
  170. *
  171. * folio_alloc_swap(): the entry point for a folio to be swapped
  172. * out. It allocates swap slots and pins the slots with swap cache.
  173. * The slots start with a swap count of zero.
  174. *
  175. * folio_dup_swap(): increases the swap count of a folio, usually
  176. * during it gets unmapped and a swap entry is installed to replace
  177. * it (e.g., swap entry in page table). A swap slot with swap
  178. * count == 0 should only be increasd by this helper.
  179. *
  180. * folio_put_swap(): does the opposite thing of folio_dup_swap().
  181. */
  182. int folio_alloc_swap(struct folio *folio);
  183. int folio_dup_swap(struct folio *folio, struct page *subpage);
  184. void folio_put_swap(struct folio *folio, struct page *subpage);
  185. /* For internal use */
  186. extern void swap_entries_free(struct swap_info_struct *si,
  187. struct swap_cluster_info *ci,
  188. unsigned long offset, unsigned int nr_pages);
  189. /* linux/mm/page_io.c */
  190. int sio_pool_init(void);
  191. struct swap_iocb;
  192. void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
  193. void __swap_read_unplug(struct swap_iocb *plug);
  194. static inline void swap_read_unplug(struct swap_iocb *plug)
  195. {
  196. if (unlikely(plug))
  197. __swap_read_unplug(plug);
  198. }
  199. void swap_write_unplug(struct swap_iocb *sio);
  200. int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
  201. void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
  202. /* linux/mm/swap_state.c */
  203. extern struct address_space swap_space __read_mostly;
  204. static inline struct address_space *swap_address_space(swp_entry_t entry)
  205. {
  206. return &swap_space;
  207. }
  208. /*
  209. * Return the swap device position of the swap entry.
  210. */
  211. static inline loff_t swap_dev_pos(swp_entry_t entry)
  212. {
  213. return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
  214. }
  215. /**
  216. * folio_matches_swap_entry - Check if a folio matches a given swap entry.
  217. * @folio: The folio.
  218. * @entry: The swap entry to check against.
  219. *
  220. * Context: The caller should have the folio locked to ensure it's stable
  221. * and nothing will move it in or out of the swap cache.
  222. * Return: true or false.
  223. */
  224. static inline bool folio_matches_swap_entry(const struct folio *folio,
  225. swp_entry_t entry)
  226. {
  227. swp_entry_t folio_entry = folio->swap;
  228. long nr_pages = folio_nr_pages(folio);
  229. VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
  230. if (!folio_test_swapcache(folio))
  231. return false;
  232. VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio);
  233. return folio_entry.val == round_down(entry.val, nr_pages);
  234. }
  235. /*
  236. * All swap cache helpers below require the caller to ensure the swap entries
  237. * used are valid and stabilize the device by any of the following ways:
  238. * - Hold a reference by get_swap_device(): this ensures a single entry is
  239. * valid and increases the swap device's refcount.
  240. * - Locking a folio in the swap cache: this ensures the folio's swap entries
  241. * are valid and pinned, also implies reference to the device.
  242. * - Locking anything referencing the swap entry: e.g. PTL that protects
  243. * swap entries in the page table, similar to locking swap cache folio.
  244. * - See the comment of get_swap_device() for more complex usage.
  245. */
  246. bool swap_cache_has_folio(swp_entry_t entry);
  247. struct folio *swap_cache_get_folio(swp_entry_t entry);
  248. void *swap_cache_get_shadow(swp_entry_t entry);
  249. void swap_cache_del_folio(struct folio *folio);
  250. struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
  251. struct mempolicy *mpol, pgoff_t ilx,
  252. bool *alloced);
  253. /* Below helpers require the caller to lock and pass in the swap cluster. */
  254. void __swap_cache_add_folio(struct swap_cluster_info *ci,
  255. struct folio *folio, swp_entry_t entry);
  256. void __swap_cache_del_folio(struct swap_cluster_info *ci,
  257. struct folio *folio, swp_entry_t entry, void *shadow);
  258. void __swap_cache_replace_folio(struct swap_cluster_info *ci,
  259. struct folio *old, struct folio *new);
  260. void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents);
  261. void show_swap_cache_info(void);
  262. void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
  263. struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  264. struct vm_area_struct *vma, unsigned long addr,
  265. struct swap_iocb **plug);
  266. struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
  267. struct mempolicy *mpol, pgoff_t ilx);
  268. struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
  269. struct vm_fault *vmf);
  270. struct folio *swapin_folio(swp_entry_t entry, struct folio *folio);
  271. void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
  272. unsigned long addr);
  273. static inline unsigned int folio_swap_flags(struct folio *folio)
  274. {
  275. return __swap_entry_to_info(folio->swap)->flags;
  276. }
  277. /*
  278. * Return the count of contiguous swap entries that share the same
  279. * zeromap status as the starting entry. If is_zeromap is not NULL,
  280. * it will return the zeromap status of the starting entry.
  281. */
  282. static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
  283. bool *is_zeromap)
  284. {
  285. struct swap_info_struct *sis = __swap_entry_to_info(entry);
  286. unsigned long start = swp_offset(entry);
  287. unsigned long end = start + max_nr;
  288. bool first_bit;
  289. first_bit = test_bit(start, sis->zeromap);
  290. if (is_zeromap)
  291. *is_zeromap = first_bit;
  292. if (max_nr <= 1)
  293. return max_nr;
  294. if (first_bit)
  295. return find_next_zero_bit(sis->zeromap, end, start) - start;
  296. else
  297. return find_next_bit(sis->zeromap, end, start) - start;
  298. }
  299. static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
  300. {
  301. int i;
  302. /*
  303. * While allocating a large folio and doing mTHP swapin, we need to
  304. * ensure all entries are not cached, otherwise, the mTHP folio will
  305. * be in conflict with the folio in swap cache.
  306. */
  307. for (i = 0; i < max_nr; i++) {
  308. if (swap_cache_has_folio(entry))
  309. return i;
  310. entry.val++;
  311. }
  312. return i;
  313. }
  314. #else /* CONFIG_SWAP */
  315. struct swap_iocb;
  316. static inline struct swap_cluster_info *swap_cluster_lock(
  317. struct swap_info_struct *si, pgoff_t offset, bool irq)
  318. {
  319. return NULL;
  320. }
  321. static inline struct swap_cluster_info *swap_cluster_get_and_lock(
  322. struct folio *folio)
  323. {
  324. return NULL;
  325. }
  326. static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
  327. struct folio *folio)
  328. {
  329. return NULL;
  330. }
  331. static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
  332. {
  333. }
  334. static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
  335. {
  336. }
  337. static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
  338. {
  339. return NULL;
  340. }
  341. static inline int folio_alloc_swap(struct folio *folio)
  342. {
  343. return -EINVAL;
  344. }
  345. static inline int folio_dup_swap(struct folio *folio, struct page *page)
  346. {
  347. return -EINVAL;
  348. }
  349. static inline void folio_put_swap(struct folio *folio, struct page *page)
  350. {
  351. }
  352. static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
  353. {
  354. }
  355. static inline void swap_write_unplug(struct swap_iocb *sio)
  356. {
  357. }
  358. static inline struct address_space *swap_address_space(swp_entry_t entry)
  359. {
  360. return NULL;
  361. }
  362. static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry)
  363. {
  364. return false;
  365. }
  366. static inline void show_swap_cache_info(void)
  367. {
  368. }
  369. static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
  370. gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
  371. {
  372. return NULL;
  373. }
  374. static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
  375. struct vm_fault *vmf)
  376. {
  377. return NULL;
  378. }
  379. static inline struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
  380. {
  381. return NULL;
  382. }
  383. static inline void swap_update_readahead(struct folio *folio,
  384. struct vm_area_struct *vma, unsigned long addr)
  385. {
  386. }
  387. static inline int swap_writeout(struct folio *folio,
  388. struct swap_iocb **swap_plug)
  389. {
  390. return 0;
  391. }
  392. static inline bool swap_cache_has_folio(swp_entry_t entry)
  393. {
  394. return false;
  395. }
  396. static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
  397. {
  398. return NULL;
  399. }
  400. static inline void *swap_cache_get_shadow(swp_entry_t entry)
  401. {
  402. return NULL;
  403. }
  404. static inline void swap_cache_del_folio(struct folio *folio)
  405. {
  406. }
  407. static inline void __swap_cache_del_folio(struct swap_cluster_info *ci,
  408. struct folio *folio, swp_entry_t entry, void *shadow)
  409. {
  410. }
  411. static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci,
  412. struct folio *old, struct folio *new)
  413. {
  414. }
  415. static inline unsigned int folio_swap_flags(struct folio *folio)
  416. {
  417. return 0;
  418. }
  419. static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
  420. bool *has_zeromap)
  421. {
  422. return 0;
  423. }
  424. static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
  425. {
  426. return 0;
  427. }
  428. #endif /* CONFIG_SWAP */
  429. #endif /* _MM_SWAP_H */