page_vma_mapped.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/mm.h>
  3. #include <linux/rmap.h>
  4. #include <linux/hugetlb.h>
  5. #include <linux/swap.h>
  6. #include <linux/leafops.h>
  7. #include "internal.h"
  8. static inline bool not_found(struct page_vma_mapped_walk *pvmw)
  9. {
  10. page_vma_mapped_walk_done(pvmw);
  11. return false;
  12. }
  13. static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
  14. spinlock_t **ptlp)
  15. {
  16. bool is_migration;
  17. pte_t ptent;
  18. if (pvmw->flags & PVMW_SYNC) {
  19. /* Use the stricter lookup */
  20. pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd,
  21. pvmw->address, &pvmw->ptl);
  22. *ptlp = pvmw->ptl;
  23. return !!pvmw->pte;
  24. }
  25. is_migration = pvmw->flags & PVMW_MIGRATION;
  26. again:
  27. /*
  28. * It is important to return the ptl corresponding to pte,
  29. * in case *pvmw->pmd changes underneath us; so we need to
  30. * return it even when choosing not to lock, in case caller
  31. * proceeds to loop over next ptes, and finds a match later.
  32. * Though, in most cases, page lock already protects this.
  33. */
  34. pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd,
  35. pvmw->address, pmdvalp, ptlp);
  36. if (!pvmw->pte)
  37. return false;
  38. ptent = ptep_get(pvmw->pte);
  39. if (pte_none(ptent)) {
  40. return false;
  41. } else if (pte_present(ptent)) {
  42. if (is_migration)
  43. return false;
  44. } else if (!is_migration) {
  45. softleaf_t entry;
  46. /*
  47. * Handle un-addressable ZONE_DEVICE memory.
  48. *
  49. * We get here when we are trying to unmap a private
  50. * device page from the process address space. Such
  51. * page is not CPU accessible and thus is mapped as
  52. * a special swap entry, nonetheless it still does
  53. * count as a valid regular mapping for the page
  54. * (and is accounted as such in page maps count).
  55. *
  56. * So handle this special case as if it was a normal
  57. * page mapping ie lock CPU page table and return true.
  58. *
  59. * For more details on device private memory see HMM
  60. * (include/linux/hmm.h or mm/hmm.c).
  61. */
  62. entry = softleaf_from_pte(ptent);
  63. if (!softleaf_is_device_private(entry) &&
  64. !softleaf_is_device_exclusive(entry))
  65. return false;
  66. }
  67. spin_lock(*ptlp);
  68. if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
  69. pte_unmap_unlock(pvmw->pte, *ptlp);
  70. goto again;
  71. }
  72. pvmw->ptl = *ptlp;
  73. return true;
  74. }
  75. /**
  76. * check_pte - check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is
  77. * mapped at the @pvmw->pte
  78. * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range
  79. * for checking
  80. * @pte_nr: the number of small pages described by @pvmw->pte.
  81. *
  82. * page_vma_mapped_walk() found a place where pfn range is *potentially*
  83. * mapped. check_pte() has to validate this.
  84. *
  85. * pvmw->pte may point to empty PTE, swap PTE or PTE pointing to
  86. * arbitrary page.
  87. *
  88. * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
  89. * entry that points to [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
  90. *
  91. * If PVMW_MIGRATION flag is not set, returns true if pvmw->pte points to
  92. * [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages)
  93. *
  94. * Otherwise, return false.
  95. *
  96. */
  97. static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
  98. {
  99. unsigned long pfn;
  100. pte_t ptent = ptep_get(pvmw->pte);
  101. if (pvmw->flags & PVMW_MIGRATION) {
  102. const softleaf_t entry = softleaf_from_pte(ptent);
  103. if (!softleaf_is_migration(entry))
  104. return false;
  105. pfn = softleaf_to_pfn(entry);
  106. } else if (pte_present(ptent)) {
  107. pfn = pte_pfn(ptent);
  108. } else {
  109. const softleaf_t entry = softleaf_from_pte(ptent);
  110. /* Handle un-addressable ZONE_DEVICE memory */
  111. if (!softleaf_is_device_private(entry) &&
  112. !softleaf_is_device_exclusive(entry))
  113. return false;
  114. pfn = softleaf_to_pfn(entry);
  115. }
  116. if ((pfn + pte_nr - 1) < pvmw->pfn)
  117. return false;
  118. if (pfn > (pvmw->pfn + pvmw->nr_pages - 1))
  119. return false;
  120. return true;
  121. }
  122. /* Returns true if the two ranges overlap. Careful to not overflow. */
  123. static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
  124. {
  125. if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn)
  126. return false;
  127. if (pfn > pvmw->pfn + pvmw->nr_pages - 1)
  128. return false;
  129. return true;
  130. }
  131. static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
  132. {
  133. pvmw->address = (pvmw->address + size) & ~(size - 1);
  134. if (!pvmw->address)
  135. pvmw->address = ULONG_MAX;
  136. }
  137. /**
  138. * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
  139. * @pvmw->address
  140. * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
  141. * must be set. pmd, pte and ptl must be NULL.
  142. *
  143. * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
  144. * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
  145. * adjusted if needed (for PTE-mapped THPs).
  146. *
  147. * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
  148. * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
  149. * a loop to find all PTEs that map the THP.
  150. *
  151. * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
  152. * regardless of which page table level the page is mapped at. @pvmw->pmd is
  153. * NULL.
  154. *
  155. * Returns false if there are no more page table entries for the page in
  156. * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
  157. *
  158. * If you need to stop the walk before page_vma_mapped_walk() returned false,
  159. * use page_vma_mapped_walk_done(). It will do the housekeeping.
  160. */
  161. bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
  162. {
  163. struct vm_area_struct *vma = pvmw->vma;
  164. struct mm_struct *mm = vma->vm_mm;
  165. unsigned long end;
  166. spinlock_t *ptl;
  167. pgd_t *pgd;
  168. p4d_t *p4d;
  169. pud_t *pud;
  170. pmd_t pmde;
  171. /* The only possible pmd mapping has been handled on last iteration */
  172. if (pvmw->pmd && !pvmw->pte)
  173. return not_found(pvmw);
  174. if (unlikely(is_vm_hugetlb_page(vma))) {
  175. struct hstate *hstate = hstate_vma(vma);
  176. unsigned long size = huge_page_size(hstate);
  177. /* The only possible mapping was handled on last iteration */
  178. if (pvmw->pte)
  179. return not_found(pvmw);
  180. /*
  181. * All callers that get here will already hold the
  182. * i_mmap_rwsem. Therefore, no additional locks need to be
  183. * taken before calling hugetlb_walk().
  184. */
  185. pvmw->pte = hugetlb_walk(vma, pvmw->address, size);
  186. if (!pvmw->pte)
  187. return false;
  188. pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte);
  189. if (!check_pte(pvmw, pages_per_huge_page(hstate)))
  190. return not_found(pvmw);
  191. return true;
  192. }
  193. end = vma_address_end(pvmw);
  194. if (pvmw->pte)
  195. goto next_pte;
  196. restart:
  197. do {
  198. pgd = pgd_offset(mm, pvmw->address);
  199. if (!pgd_present(*pgd)) {
  200. step_forward(pvmw, PGDIR_SIZE);
  201. continue;
  202. }
  203. p4d = p4d_offset(pgd, pvmw->address);
  204. if (!p4d_present(*p4d)) {
  205. step_forward(pvmw, P4D_SIZE);
  206. continue;
  207. }
  208. pud = pud_offset(p4d, pvmw->address);
  209. if (!pud_present(*pud)) {
  210. step_forward(pvmw, PUD_SIZE);
  211. continue;
  212. }
  213. pvmw->pmd = pmd_offset(pud, pvmw->address);
  214. /*
  215. * Make sure the pmd value isn't cached in a register by the
  216. * compiler and used as a stale value after we've observed a
  217. * subsequent update.
  218. */
  219. pmde = pmdp_get_lockless(pvmw->pmd);
  220. if (pmd_trans_huge(pmde) || pmd_is_migration_entry(pmde)) {
  221. pvmw->ptl = pmd_lock(mm, pvmw->pmd);
  222. pmde = *pvmw->pmd;
  223. if (!pmd_present(pmde)) {
  224. softleaf_t entry;
  225. if (!thp_migration_supported() ||
  226. !(pvmw->flags & PVMW_MIGRATION))
  227. return not_found(pvmw);
  228. entry = softleaf_from_pmd(pmde);
  229. if (!softleaf_is_migration(entry) ||
  230. !check_pmd(softleaf_to_pfn(entry), pvmw))
  231. return not_found(pvmw);
  232. return true;
  233. }
  234. if (likely(pmd_trans_huge(pmde))) {
  235. if (pvmw->flags & PVMW_MIGRATION)
  236. return not_found(pvmw);
  237. if (!check_pmd(pmd_pfn(pmde), pvmw))
  238. return not_found(pvmw);
  239. return true;
  240. }
  241. /* THP pmd was split under us: handle on pte level */
  242. spin_unlock(pvmw->ptl);
  243. pvmw->ptl = NULL;
  244. } else if (!pmd_present(pmde)) {
  245. /*
  246. * If PVMW_SYNC, take and drop THP pmd lock so that we
  247. * cannot return prematurely, while zap_huge_pmd() has
  248. * cleared *pmd but not decremented compound_mapcount().
  249. */
  250. const softleaf_t entry = softleaf_from_pmd(pmde);
  251. if (softleaf_is_device_private(entry)) {
  252. pvmw->ptl = pmd_lock(mm, pvmw->pmd);
  253. return true;
  254. }
  255. if ((pvmw->flags & PVMW_SYNC) &&
  256. thp_vma_suitable_order(vma, pvmw->address,
  257. PMD_ORDER) &&
  258. (pvmw->nr_pages >= HPAGE_PMD_NR)) {
  259. spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
  260. spin_unlock(ptl);
  261. }
  262. step_forward(pvmw, PMD_SIZE);
  263. continue;
  264. }
  265. if (!map_pte(pvmw, &pmde, &ptl)) {
  266. if (!pvmw->pte)
  267. goto restart;
  268. goto next_pte;
  269. }
  270. this_pte:
  271. if (check_pte(pvmw, 1))
  272. return true;
  273. next_pte:
  274. do {
  275. pvmw->address += PAGE_SIZE;
  276. if (pvmw->address >= end)
  277. return not_found(pvmw);
  278. /* Did we cross page table boundary? */
  279. if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
  280. if (pvmw->ptl) {
  281. spin_unlock(pvmw->ptl);
  282. pvmw->ptl = NULL;
  283. }
  284. pte_unmap(pvmw->pte);
  285. pvmw->pte = NULL;
  286. pvmw->flags |= PVMW_PGTABLE_CROSSED;
  287. goto restart;
  288. }
  289. pvmw->pte++;
  290. } while (pte_none(ptep_get(pvmw->pte)));
  291. if (!pvmw->ptl) {
  292. spin_lock(ptl);
  293. if (unlikely(!pmd_same(pmde, pmdp_get_lockless(pvmw->pmd)))) {
  294. pte_unmap_unlock(pvmw->pte, ptl);
  295. pvmw->pte = NULL;
  296. goto restart;
  297. }
  298. pvmw->ptl = ptl;
  299. }
  300. goto this_pte;
  301. } while (pvmw->address < end);
  302. return false;
  303. }
  304. #ifdef CONFIG_MEMORY_FAILURE
  305. /**
  306. * page_mapped_in_vma - check whether a page is really mapped in a VMA
  307. * @page: the page to test
  308. * @vma: the VMA to test
  309. *
  310. * Return: The address the page is mapped at if the page is in the range
  311. * covered by the VMA and present in the page table. If the page is
  312. * outside the VMA or not present, returns -EFAULT.
  313. * Only valid for normal file or anonymous VMAs.
  314. */
  315. unsigned long page_mapped_in_vma(const struct page *page,
  316. struct vm_area_struct *vma)
  317. {
  318. const struct folio *folio = page_folio(page);
  319. struct page_vma_mapped_walk pvmw = {
  320. .pfn = page_to_pfn(page),
  321. .nr_pages = 1,
  322. .vma = vma,
  323. .flags = PVMW_SYNC,
  324. };
  325. pvmw.address = vma_address(vma, page_pgoff(folio, page), 1);
  326. if (pvmw.address == -EFAULT)
  327. goto out;
  328. if (!page_vma_mapped_walk(&pvmw))
  329. return -EFAULT;
  330. page_vma_mapped_walk_done(&pvmw);
  331. out:
  332. return pvmw.address;
  333. }
  334. #endif