page.c 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/memblock.h>
  3. #include <linux/compiler.h>
  4. #include <linux/fs.h>
  5. #include <linux/init.h>
  6. #include <linux/ksm.h>
  7. #include <linux/mm.h>
  8. #include <linux/mmzone.h>
  9. #include <linux/huge_mm.h>
  10. #include <linux/proc_fs.h>
  11. #include <linux/seq_file.h>
  12. #include <linux/hugetlb.h>
  13. #include <linux/memremap.h>
  14. #include <linux/memcontrol.h>
  15. #include <linux/mmu_notifier.h>
  16. #include <linux/page_idle.h>
  17. #include <linux/kernel-page-flags.h>
  18. #include <linux/uaccess.h>
  19. #include "internal.h"
  20. #define KPMSIZE sizeof(u64)
  21. #define KPMMASK (KPMSIZE - 1)
  22. enum kpage_operation {
  23. KPAGE_FLAGS,
  24. KPAGE_COUNT,
  25. KPAGE_CGROUP,
  26. };
  27. static inline unsigned long get_max_dump_pfn(void)
  28. {
  29. #ifdef CONFIG_SPARSEMEM
  30. /*
  31. * The memmap of early sections is completely populated and marked
  32. * online even if max_pfn does not fall on a section boundary -
  33. * pfn_to_online_page() will succeed on all pages. Allow inspecting
  34. * these memmaps.
  35. */
  36. return round_up(max_pfn, PAGES_PER_SECTION);
  37. #else
  38. return max_pfn;
  39. #endif
  40. }
  41. static u64 get_kpage_count(const struct page *page)
  42. {
  43. struct page_snapshot ps;
  44. u64 ret;
  45. snapshot_page(&ps, page);
  46. if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
  47. ret = folio_precise_page_mapcount(&ps.folio_snapshot,
  48. &ps.page_snapshot);
  49. else
  50. ret = folio_average_page_mapcount(&ps.folio_snapshot);
  51. return ret;
  52. }
  53. static ssize_t kpage_read(struct file *file, char __user *buf,
  54. size_t count, loff_t *ppos,
  55. enum kpage_operation op)
  56. {
  57. const unsigned long max_dump_pfn = get_max_dump_pfn();
  58. u64 __user *out = (u64 __user *)buf;
  59. struct page *page;
  60. unsigned long src = *ppos;
  61. unsigned long pfn;
  62. ssize_t ret = 0;
  63. u64 info;
  64. pfn = src / KPMSIZE;
  65. if (src & KPMMASK || count & KPMMASK)
  66. return -EINVAL;
  67. if (src >= max_dump_pfn * KPMSIZE)
  68. return 0;
  69. count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
  70. while (count > 0) {
  71. /*
  72. * TODO: ZONE_DEVICE support requires to identify
  73. * memmaps that were actually initialized.
  74. */
  75. page = pfn_to_online_page(pfn);
  76. if (page) {
  77. switch (op) {
  78. case KPAGE_FLAGS:
  79. info = stable_page_flags(page);
  80. break;
  81. case KPAGE_COUNT:
  82. info = get_kpage_count(page);
  83. break;
  84. case KPAGE_CGROUP:
  85. info = page_cgroup_ino(page);
  86. break;
  87. default:
  88. info = 0;
  89. break;
  90. }
  91. } else
  92. info = 0;
  93. if (put_user(info, out)) {
  94. ret = -EFAULT;
  95. break;
  96. }
  97. pfn++;
  98. out++;
  99. count -= KPMSIZE;
  100. cond_resched();
  101. }
  102. *ppos += (char __user *)out - buf;
  103. if (!ret)
  104. ret = (char __user *)out - buf;
  105. return ret;
  106. }
  107. /* /proc/kpagecount - an array exposing page mapcounts
  108. *
  109. * Each entry is a u64 representing the corresponding
  110. * physical page mapcount.
  111. */
  112. static ssize_t kpagecount_read(struct file *file, char __user *buf,
  113. size_t count, loff_t *ppos)
  114. {
  115. return kpage_read(file, buf, count, ppos, KPAGE_COUNT);
  116. }
  117. static const struct proc_ops kpagecount_proc_ops = {
  118. .proc_flags = PROC_ENTRY_PERMANENT,
  119. .proc_lseek = mem_lseek,
  120. .proc_read = kpagecount_read,
  121. };
  122. static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
  123. {
  124. return ((kflags >> kbit) & 1) << ubit;
  125. }
  126. u64 stable_page_flags(const struct page *page)
  127. {
  128. const struct folio *folio;
  129. struct page_snapshot ps;
  130. unsigned long k;
  131. unsigned long mapping;
  132. bool is_anon;
  133. u64 u = 0;
  134. /*
  135. * pseudo flag: KPF_NOPAGE
  136. * it differentiates a memory hole from a page with no flags
  137. */
  138. if (!page)
  139. return 1 << KPF_NOPAGE;
  140. snapshot_page(&ps, page);
  141. folio = &ps.folio_snapshot;
  142. k = folio->flags.f;
  143. mapping = (unsigned long)folio->mapping;
  144. is_anon = mapping & FOLIO_MAPPING_ANON;
  145. /*
  146. * pseudo flags for the well known (anonymous) memory mapped pages
  147. */
  148. if (folio_mapped(folio))
  149. u |= 1 << KPF_MMAP;
  150. if (is_anon) {
  151. u |= 1 << KPF_ANON;
  152. if (mapping & FOLIO_MAPPING_KSM)
  153. u |= 1 << KPF_KSM;
  154. }
  155. /*
  156. * compound pages: export both head/tail info
  157. * they together define a compound page's start/end pos and order
  158. */
  159. if (ps.idx == 0)
  160. u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head);
  161. else
  162. u |= 1 << KPF_COMPOUND_TAIL;
  163. if (folio_test_hugetlb(folio))
  164. u |= 1 << KPF_HUGE;
  165. else if (folio_test_large(folio) &&
  166. folio_test_large_rmappable(folio)) {
  167. /* Note: we indicate any THPs here, not just PMD-sized ones */
  168. u |= 1 << KPF_THP;
  169. } else if (is_huge_zero_pfn(ps.pfn)) {
  170. u |= 1 << KPF_ZERO_PAGE;
  171. u |= 1 << KPF_THP;
  172. } else if (is_zero_pfn(ps.pfn)) {
  173. u |= 1 << KPF_ZERO_PAGE;
  174. }
  175. if (ps.flags & PAGE_SNAPSHOT_PG_BUDDY)
  176. u |= 1 << KPF_BUDDY;
  177. if (folio_test_offline(folio))
  178. u |= 1 << KPF_OFFLINE;
  179. if (folio_test_pgtable(folio))
  180. u |= 1 << KPF_PGTABLE;
  181. if (folio_test_slab(folio))
  182. u |= 1 << KPF_SLAB;
  183. #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
  184. u |= kpf_copy_bit(k, KPF_IDLE, PG_idle);
  185. #else
  186. if (ps.flags & PAGE_SNAPSHOT_PG_IDLE)
  187. u |= 1 << KPF_IDLE;
  188. #endif
  189. u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
  190. u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
  191. u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate);
  192. u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback);
  193. u |= kpf_copy_bit(k, KPF_LRU, PG_lru);
  194. u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced);
  195. u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active);
  196. u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim);
  197. #define SWAPCACHE ((1 << PG_swapbacked) | (1 << PG_swapcache))
  198. if ((k & SWAPCACHE) == SWAPCACHE)
  199. u |= 1 << KPF_SWAPCACHE;
  200. u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked);
  201. u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable);
  202. u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked);
  203. #ifdef CONFIG_MEMORY_FAILURE
  204. if (u & (1 << KPF_HUGE))
  205. u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
  206. else
  207. u |= kpf_copy_bit(ps.page_snapshot.flags.f, KPF_HWPOISON, PG_hwpoison);
  208. #endif
  209. u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved);
  210. u |= kpf_copy_bit(k, KPF_OWNER_2, PG_owner_2);
  211. u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private);
  212. u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2);
  213. u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1);
  214. u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1);
  215. #ifdef CONFIG_ARCH_USES_PG_ARCH_2
  216. u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2);
  217. #endif
  218. #ifdef CONFIG_ARCH_USES_PG_ARCH_3
  219. u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3);
  220. #endif
  221. return u;
  222. }
  223. EXPORT_SYMBOL_GPL(stable_page_flags);
  224. /* /proc/kpageflags - an array exposing page flags
  225. *
  226. * Each entry is a u64 representing the corresponding
  227. * physical page flags.
  228. */
  229. static ssize_t kpageflags_read(struct file *file, char __user *buf,
  230. size_t count, loff_t *ppos)
  231. {
  232. return kpage_read(file, buf, count, ppos, KPAGE_FLAGS);
  233. }
  234. static const struct proc_ops kpageflags_proc_ops = {
  235. .proc_flags = PROC_ENTRY_PERMANENT,
  236. .proc_lseek = mem_lseek,
  237. .proc_read = kpageflags_read,
  238. };
  239. #ifdef CONFIG_MEMCG
  240. static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
  241. size_t count, loff_t *ppos)
  242. {
  243. return kpage_read(file, buf, count, ppos, KPAGE_CGROUP);
  244. }
  245. static const struct proc_ops kpagecgroup_proc_ops = {
  246. .proc_flags = PROC_ENTRY_PERMANENT,
  247. .proc_lseek = mem_lseek,
  248. .proc_read = kpagecgroup_read,
  249. };
  250. #endif /* CONFIG_MEMCG */
  251. static int __init proc_page_init(void)
  252. {
  253. proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops);
  254. proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops);
  255. #ifdef CONFIG_MEMCG
  256. proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops);
  257. #endif
  258. return 0;
  259. }
  260. fs_initcall(proc_page_init);