tlb.h 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /* include/asm-generic/tlb.h
  3. *
  4. * Generic TLB shootdown code
  5. *
  6. * Copyright 2001 Red Hat, Inc.
  7. * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  8. *
  9. * Copyright 2011 Red Hat, Inc., Peter Zijlstra
  10. */
  11. #ifndef _ASM_GENERIC__TLB_H
  12. #define _ASM_GENERIC__TLB_H
  13. #include <linux/mmu_notifier.h>
  14. #include <linux/swap.h>
  15. #include <linux/hugetlb_inline.h>
  16. #include <asm/tlbflush.h>
  17. #include <asm/cacheflush.h>
  18. /*
  19. * Blindly accessing user memory from NMI context can be dangerous
  20. * if we're in the middle of switching the current user task or switching
  21. * the loaded mm.
  22. */
  23. #ifndef nmi_uaccess_okay
  24. # define nmi_uaccess_okay() true
  25. #endif
  26. #ifdef CONFIG_MMU
  27. /*
  28. * Generic MMU-gather implementation.
  29. *
  30. * The mmu_gather data structure is used by the mm code to implement the
  31. * correct and efficient ordering of freeing pages and TLB invalidations.
  32. *
  33. * This correct ordering is:
  34. *
  35. * 1) unhook page
  36. * 2) TLB invalidate page
  37. * 3) free page
  38. *
  39. * That is, we must never free a page before we have ensured there are no live
  40. * translations left to it. Otherwise it might be possible to observe (or
  41. * worse, change) the page content after it has been reused.
  42. *
  43. * The mmu_gather API consists of:
  44. *
  45. * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_gather_mmu_vma() /
  46. * tlb_finish_mmu()
  47. *
  48. * start and finish a mmu_gather
  49. *
  50. * Finish in particular will issue a (final) TLB invalidate and free
  51. * all (remaining) queued pages.
  52. *
  53. * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA
  54. *
  55. * Defaults to flushing at tlb_end_vma() to reset the range; helps when
  56. * there's large holes between the VMAs.
  57. *
  58. * - tlb_free_vmas()
  59. *
  60. * tlb_free_vmas() marks the start of unlinking of one or more vmas
  61. * and freeing page-tables.
  62. *
  63. * - tlb_remove_table()
  64. *
  65. * tlb_remove_table() is the basic primitive to free page-table directories
  66. * (__p*_free_tlb()). In it's most primitive form it is an alias for
  67. * tlb_remove_page() below, for when page directories are pages and have no
  68. * additional constraints.
  69. *
  70. * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE.
  71. *
  72. * - tlb_remove_page() / tlb_remove_page_size()
  73. * - __tlb_remove_folio_pages() / __tlb_remove_page_size()
  74. * - __tlb_remove_folio_pages_size()
  75. *
  76. * __tlb_remove_folio_pages_size() is the basic primitive that queues pages
  77. * for freeing. It will return a boolean indicating if the queue is (now)
  78. * full and a call to tlb_flush_mmu() is required.
  79. *
  80. * tlb_remove_page() and tlb_remove_page_size() imply the call to
  81. * tlb_flush_mmu() when required and has no return value.
  82. *
  83. * __tlb_remove_folio_pages() is similar to __tlb_remove_page_size(),
  84. * however, instead of removing a single page, assume PAGE_SIZE and remove
  85. * the given number of consecutive pages that are all part of the
  86. * same (large) folio.
  87. *
  88. * - tlb_change_page_size()
  89. *
  90. * call before __tlb_remove_page*() to set the current page-size; implies a
  91. * possible tlb_flush_mmu() call.
  92. *
  93. * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly()
  94. *
  95. * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets
  96. * related state, like the range)
  97. *
  98. * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees
  99. * whatever pages are still batched.
  100. *
  101. * - mmu_gather::fullmm
  102. *
  103. * A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free
  104. * the entire mm; this allows a number of optimizations.
  105. *
  106. * - We can ignore tlb_{start,end}_vma(); because we don't
  107. * care about ranges. Everything will be shot down.
  108. *
  109. * - (RISC) architectures that use ASIDs can cycle to a new ASID
  110. * and delay the invalidation until ASID space runs out.
  111. *
  112. * - mmu_gather::need_flush_all
  113. *
  114. * A flag that can be set by the arch code if it wants to force
  115. * flush the entire TLB irrespective of the range. For instance
  116. * x86-PAE needs this when changing top-level entries.
  117. *
  118. * And allows the architecture to provide and implement tlb_flush():
  119. *
  120. * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make
  121. * use of:
  122. *
  123. * - mmu_gather::start / mmu_gather::end
  124. *
  125. * which provides the range that needs to be flushed to cover the pages to
  126. * be freed.
  127. *
  128. * - mmu_gather::freed_tables
  129. *
  130. * set when we freed page table pages
  131. *
  132. * - tlb_get_unmap_shift() / tlb_get_unmap_size()
  133. *
  134. * returns the smallest TLB entry size unmapped in this range.
  135. *
  136. * If an architecture does not provide tlb_flush() a default implementation
  137. * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is
  138. * specified, in which case we'll default to flush_tlb_mm().
  139. *
  140. * Additionally there are a few opt-in features:
  141. *
  142. * MMU_GATHER_PAGE_SIZE
  143. *
  144. * This ensures we call tlb_flush() every time tlb_change_page_size() actually
  145. * changes the size and provides mmu_gather::page_size to tlb_flush().
  146. *
  147. * This might be useful if your architecture has size specific TLB
  148. * invalidation instructions.
  149. *
  150. * MMU_GATHER_TABLE_FREE
  151. *
  152. * This provides tlb_remove_table(), to be used instead of tlb_remove_page()
  153. * for page directores (__p*_free_tlb()).
  154. *
  155. * Useful if your architecture has non-page page directories.
  156. *
  157. * When used, an architecture is expected to provide __tlb_remove_table() or
  158. * use the generic __tlb_remove_table(), which does the actual freeing of these
  159. * pages.
  160. *
  161. * MMU_GATHER_RCU_TABLE_FREE
  162. *
  163. * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see
  164. * comment below).
  165. *
  166. * Useful if your architecture doesn't use IPIs for remote TLB invalidates
  167. * and therefore doesn't naturally serialize with software page-table walkers.
  168. *
  169. * MMU_GATHER_NO_FLUSH_CACHE
  170. *
  171. * Indicates the architecture has flush_cache_range() but it needs *NOT* be called
  172. * before unmapping a VMA.
  173. *
  174. * NOTE: strictly speaking we shouldn't have this knob and instead rely on
  175. * flush_cache_range() being a NOP, except Sparc64 seems to be
  176. * different here.
  177. *
  178. * MMU_GATHER_MERGE_VMAS
  179. *
  180. * Indicates the architecture wants to merge ranges over VMAs; typical when
  181. * multiple range invalidates are more expensive than a full invalidate.
  182. *
  183. * MMU_GATHER_NO_RANGE
  184. *
  185. * Use this if your architecture lacks an efficient flush_tlb_range(). This
  186. * option implies MMU_GATHER_MERGE_VMAS above.
  187. *
  188. * MMU_GATHER_NO_GATHER
  189. *
  190. * If the option is set the mmu_gather will not track individual pages for
  191. * delayed page free anymore. A platform that enables the option needs to
  192. * provide its own implementation of the __tlb_remove_page_size() function to
  193. * free pages.
  194. *
  195. * This is useful if your architecture already flushes TLB entries in the
  196. * various ptep_get_and_clear() functions.
  197. */
  198. #ifdef CONFIG_MMU_GATHER_TABLE_FREE
  199. struct mmu_table_batch {
  200. #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
  201. struct rcu_head rcu;
  202. #endif
  203. unsigned int nr;
  204. void *tables[];
  205. };
  206. #define MAX_TABLE_BATCH \
  207. ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
  208. #ifndef CONFIG_HAVE_ARCH_TLB_REMOVE_TABLE
  209. static inline void __tlb_remove_table(void *table)
  210. {
  211. struct ptdesc *ptdesc = (struct ptdesc *)table;
  212. pagetable_dtor_free(ptdesc);
  213. }
  214. #endif
  215. extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
  216. #else /* !CONFIG_MMU_GATHER_TABLE_FREE */
  217. static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page);
  218. /*
  219. * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based
  220. * page directories and we can use the normal page batching to free them.
  221. */
  222. static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
  223. {
  224. struct ptdesc *ptdesc = (struct ptdesc *)table;
  225. pagetable_dtor(ptdesc);
  226. tlb_remove_page(tlb, ptdesc_page(ptdesc));
  227. }
  228. #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
  229. #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
  230. /*
  231. * This allows an architecture that does not use the linux page-tables for
  232. * hardware to skip the TLBI when freeing page tables.
  233. */
  234. #ifndef tlb_needs_table_invalidate
  235. #define tlb_needs_table_invalidate() (true)
  236. #endif
  237. void tlb_remove_table_sync_one(void);
  238. #else
  239. #ifdef tlb_needs_table_invalidate
  240. #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
  241. #endif
  242. static inline void tlb_remove_table_sync_one(void) { }
  243. #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
  244. #ifndef CONFIG_MMU_GATHER_NO_GATHER
  245. /*
  246. * If we can't allocate a page to make a big batch of page pointers
  247. * to work on, then just handle a few from the on-stack structure.
  248. */
  249. #define MMU_GATHER_BUNDLE 8
  250. struct mmu_gather_batch {
  251. struct mmu_gather_batch *next;
  252. unsigned int nr;
  253. unsigned int max;
  254. struct encoded_page *encoded_pages[];
  255. };
  256. #define MAX_GATHER_BATCH \
  257. ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
  258. /*
  259. * Limit the maximum number of mmu_gather batches to reduce a risk of soft
  260. * lockups for non-preemptible kernels on huge machines when a lot of memory
  261. * is zapped during unmapping.
  262. * 10K pages freed at once should be safe even without a preemption point.
  263. */
  264. #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH)
  265. extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size);
  266. bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page,
  267. unsigned int nr_pages, bool delay_rmap);
  268. #ifdef CONFIG_SMP
  269. /*
  270. * This both sets 'delayed_rmap', and returns true. It would be an inline
  271. * function, except we define it before the 'struct mmu_gather'.
  272. */
  273. #define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true)
  274. extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
  275. #endif
  276. #endif
  277. /*
  278. * We have a no-op version of the rmap removal that doesn't
  279. * delay anything. That is used on S390, which flushes remote
  280. * TLBs synchronously, and on UP, which doesn't have any
  281. * remote TLBs to flush and is not preemptible due to this
  282. * all happening under the page table lock.
  283. */
  284. #ifndef tlb_delay_rmap
  285. #define tlb_delay_rmap(tlb) (false)
  286. static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
  287. #endif
  288. /*
  289. * struct mmu_gather is an opaque type used by the mm code for passing around
  290. * any data needed by arch specific code for tlb_remove_page.
  291. */
  292. struct mmu_gather {
  293. struct mm_struct *mm;
  294. #ifdef CONFIG_MMU_GATHER_TABLE_FREE
  295. struct mmu_table_batch *batch;
  296. #endif
  297. unsigned long start;
  298. unsigned long end;
  299. /*
  300. * we are in the middle of an operation to clear
  301. * a full mm and can make some optimizations
  302. */
  303. unsigned int fullmm : 1;
  304. /*
  305. * we have performed an operation which
  306. * requires a complete flush of the tlb
  307. */
  308. unsigned int need_flush_all : 1;
  309. /*
  310. * we have removed page directories
  311. */
  312. unsigned int freed_tables : 1;
  313. /*
  314. * Do we have pending delayed rmap removals?
  315. */
  316. unsigned int delayed_rmap : 1;
  317. /*
  318. * at which levels have we cleared entries?
  319. */
  320. unsigned int cleared_ptes : 1;
  321. unsigned int cleared_pmds : 1;
  322. unsigned int cleared_puds : 1;
  323. unsigned int cleared_p4ds : 1;
  324. /*
  325. * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma
  326. */
  327. unsigned int vma_exec : 1;
  328. unsigned int vma_huge : 1;
  329. unsigned int vma_pfn : 1;
  330. /*
  331. * Did we unshare (unmap) any shared page tables? For now only
  332. * used for hugetlb PMD table sharing.
  333. */
  334. unsigned int unshared_tables : 1;
  335. /*
  336. * Did we unshare any page tables such that they are now exclusive
  337. * and could get reused+modified by the new owner? When setting this
  338. * flag, "unshared_tables" will be set as well. For now only used
  339. * for hugetlb PMD table sharing.
  340. */
  341. unsigned int fully_unshared_tables : 1;
  342. unsigned int batch_count;
  343. #ifndef CONFIG_MMU_GATHER_NO_GATHER
  344. struct mmu_gather_batch *active;
  345. struct mmu_gather_batch local;
  346. struct page *__pages[MMU_GATHER_BUNDLE];
  347. #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
  348. unsigned int page_size;
  349. #endif
  350. #endif
  351. };
  352. void tlb_flush_mmu(struct mmu_gather *tlb);
  353. static inline void __tlb_adjust_range(struct mmu_gather *tlb,
  354. unsigned long address,
  355. unsigned int range_size)
  356. {
  357. tlb->start = min(tlb->start, address);
  358. tlb->end = max(tlb->end, address + range_size);
  359. }
  360. static inline void __tlb_reset_range(struct mmu_gather *tlb)
  361. {
  362. if (tlb->fullmm) {
  363. tlb->start = tlb->end = ~0;
  364. } else {
  365. tlb->start = TASK_SIZE;
  366. tlb->end = 0;
  367. }
  368. tlb->freed_tables = 0;
  369. tlb->cleared_ptes = 0;
  370. tlb->cleared_pmds = 0;
  371. tlb->cleared_puds = 0;
  372. tlb->cleared_p4ds = 0;
  373. tlb->unshared_tables = 0;
  374. /*
  375. * Do not reset mmu_gather::vma_* fields here, we do not
  376. * call into tlb_start_vma() again to set them if there is an
  377. * intermediate flush.
  378. */
  379. }
  380. #ifdef CONFIG_MMU_GATHER_NO_RANGE
  381. #if defined(tlb_flush)
  382. #error MMU_GATHER_NO_RANGE relies on default tlb_flush()
  383. #endif
  384. /*
  385. * When an architecture does not have efficient means of range flushing TLBs
  386. * there is no point in doing intermediate flushes on tlb_end_vma() to keep the
  387. * range small. We equally don't have to worry about page granularity or other
  388. * things.
  389. *
  390. * All we need to do is issue a full flush for any !0 range.
  391. */
  392. static inline void tlb_flush(struct mmu_gather *tlb)
  393. {
  394. if (tlb->end)
  395. flush_tlb_mm(tlb->mm);
  396. }
  397. #else /* CONFIG_MMU_GATHER_NO_RANGE */
  398. #ifndef tlb_flush
  399. /*
  400. * When an architecture does not provide its own tlb_flush() implementation
  401. * but does have a reasonably efficient flush_vma_range() implementation
  402. * use that.
  403. */
  404. static inline void tlb_flush(struct mmu_gather *tlb)
  405. {
  406. if (tlb->fullmm || tlb->need_flush_all) {
  407. flush_tlb_mm(tlb->mm);
  408. } else if (tlb->end) {
  409. struct vm_area_struct vma = {
  410. .vm_mm = tlb->mm,
  411. .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) |
  412. (tlb->vma_huge ? VM_HUGETLB : 0),
  413. };
  414. flush_tlb_range(&vma, tlb->start, tlb->end);
  415. }
  416. }
  417. #endif
  418. #endif /* CONFIG_MMU_GATHER_NO_RANGE */
  419. static inline void
  420. tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
  421. {
  422. /*
  423. * flush_tlb_range() implementations that look at VM_HUGETLB (tile,
  424. * mips-4k) flush only large pages.
  425. *
  426. * flush_tlb_range() implementations that flush I-TLB also flush D-TLB
  427. * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing
  428. * range.
  429. *
  430. * We rely on tlb_end_vma() to issue a flush, such that when we reset
  431. * these values the batch is empty.
  432. */
  433. tlb->vma_huge = is_vm_hugetlb_page(vma);
  434. tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
  435. /*
  436. * Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma
  437. * in the tracked range, see tlb_free_vmas().
  438. */
  439. tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
  440. }
  441. static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
  442. {
  443. /*
  444. * Anything calling __tlb_adjust_range() also sets at least one of
  445. * these bits.
  446. */
  447. if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
  448. tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables))
  449. return;
  450. tlb_flush(tlb);
  451. __tlb_reset_range(tlb);
  452. }
  453. static inline void tlb_remove_page_size(struct mmu_gather *tlb,
  454. struct page *page, int page_size)
  455. {
  456. if (__tlb_remove_page_size(tlb, page, page_size))
  457. tlb_flush_mmu(tlb);
  458. }
  459. static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
  460. {
  461. return tlb_remove_page_size(tlb, page, PAGE_SIZE);
  462. }
  463. static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt)
  464. {
  465. tlb_remove_table(tlb, pt);
  466. }
  467. static inline void tlb_change_page_size(struct mmu_gather *tlb,
  468. unsigned int page_size)
  469. {
  470. #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
  471. if (tlb->page_size && tlb->page_size != page_size) {
  472. if (!tlb->fullmm && !tlb->need_flush_all)
  473. tlb_flush_mmu(tlb);
  474. }
  475. tlb->page_size = page_size;
  476. #endif
  477. }
  478. static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
  479. {
  480. if (tlb->cleared_ptes)
  481. return PAGE_SHIFT;
  482. if (tlb->cleared_pmds)
  483. return PMD_SHIFT;
  484. if (tlb->cleared_puds)
  485. return PUD_SHIFT;
  486. if (tlb->cleared_p4ds)
  487. return P4D_SHIFT;
  488. return PAGE_SHIFT;
  489. }
  490. static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
  491. {
  492. return 1UL << tlb_get_unmap_shift(tlb);
  493. }
  494. /*
  495. * In the case of tlb vma handling, we can optimise these away in the
  496. * case where we're doing a full MM flush. When we're doing a munmap,
  497. * the vmas are adjusted to only cover the region to be torn down.
  498. */
  499. static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
  500. {
  501. if (tlb->fullmm)
  502. return;
  503. tlb_update_vma_flags(tlb, vma);
  504. #ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE
  505. flush_cache_range(vma, vma->vm_start, vma->vm_end);
  506. #endif
  507. }
  508. static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
  509. {
  510. if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS))
  511. return;
  512. /*
  513. * Do a TLB flush and reset the range at VMA boundaries; this avoids
  514. * the ranges growing with the unused space between consecutive VMAs,
  515. * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
  516. * this.
  517. */
  518. tlb_flush_mmu_tlbonly(tlb);
  519. }
  520. static inline void tlb_free_vmas(struct mmu_gather *tlb)
  521. {
  522. if (tlb->fullmm)
  523. return;
  524. /*
  525. * VM_PFNMAP is more fragile because the core mm will not track the
  526. * page mapcount -- there might not be page-frames for these PFNs
  527. * after all.
  528. *
  529. * Specifically() there is a race between munmap() and
  530. * unmap_mapping_range(), where munmap() will unlink the VMA, such
  531. * that unmap_mapping_range() will no longer observe the VMA and
  532. * no-op, without observing the TLBI, returning prematurely.
  533. *
  534. * So if we're about to unlink such a VMA, and we have pending
  535. * TLBI for such a vma, flush things now.
  536. */
  537. if (tlb->vma_pfn)
  538. tlb_flush_mmu_tlbonly(tlb);
  539. }
  540. /*
  541. * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
  542. * and set corresponding cleared_*.
  543. */
  544. static inline void tlb_flush_pte_range(struct mmu_gather *tlb,
  545. unsigned long address, unsigned long size)
  546. {
  547. __tlb_adjust_range(tlb, address, size);
  548. tlb->cleared_ptes = 1;
  549. }
  550. static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
  551. unsigned long address, unsigned long size)
  552. {
  553. __tlb_adjust_range(tlb, address, size);
  554. tlb->cleared_pmds = 1;
  555. }
  556. static inline void tlb_flush_pud_range(struct mmu_gather *tlb,
  557. unsigned long address, unsigned long size)
  558. {
  559. __tlb_adjust_range(tlb, address, size);
  560. tlb->cleared_puds = 1;
  561. }
  562. static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
  563. unsigned long address, unsigned long size)
  564. {
  565. __tlb_adjust_range(tlb, address, size);
  566. tlb->cleared_p4ds = 1;
  567. }
  568. #ifndef __tlb_remove_tlb_entry
  569. static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
  570. {
  571. }
  572. #endif
  573. /**
  574. * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
  575. *
  576. * Record the fact that pte's were really unmapped by updating the range,
  577. * so we can later optimise away the tlb invalidate. This helps when
  578. * userspace is unmapping already-unmapped pages, which happens quite a lot.
  579. */
  580. #define tlb_remove_tlb_entry(tlb, ptep, address) \
  581. do { \
  582. tlb_flush_pte_range(tlb, address, PAGE_SIZE); \
  583. __tlb_remove_tlb_entry(tlb, ptep, address); \
  584. } while (0)
  585. /**
  586. * tlb_remove_tlb_entries - remember unmapping of multiple consecutive ptes for
  587. * later tlb invalidation.
  588. *
  589. * Similar to tlb_remove_tlb_entry(), but remember unmapping of multiple
  590. * consecutive ptes instead of only a single one.
  591. */
  592. static inline void tlb_remove_tlb_entries(struct mmu_gather *tlb,
  593. pte_t *ptep, unsigned int nr, unsigned long address)
  594. {
  595. tlb_flush_pte_range(tlb, address, PAGE_SIZE * nr);
  596. for (;;) {
  597. __tlb_remove_tlb_entry(tlb, ptep, address);
  598. if (--nr == 0)
  599. break;
  600. ptep++;
  601. address += PAGE_SIZE;
  602. }
  603. }
  604. #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
  605. do { \
  606. unsigned long _sz = huge_page_size(h); \
  607. if (_sz >= P4D_SIZE) \
  608. tlb_flush_p4d_range(tlb, address, _sz); \
  609. else if (_sz >= PUD_SIZE) \
  610. tlb_flush_pud_range(tlb, address, _sz); \
  611. else if (_sz >= PMD_SIZE) \
  612. tlb_flush_pmd_range(tlb, address, _sz); \
  613. else \
  614. tlb_flush_pte_range(tlb, address, _sz); \
  615. __tlb_remove_tlb_entry(tlb, ptep, address); \
  616. } while (0)
  617. /**
  618. * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
  619. * This is a nop so far, because only x86 needs it.
  620. */
  621. #ifndef __tlb_remove_pmd_tlb_entry
  622. #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
  623. #endif
  624. #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
  625. do { \
  626. tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \
  627. __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
  628. } while (0)
  629. /**
  630. * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
  631. * invalidation. This is a nop so far, because only x86 needs it.
  632. */
  633. #ifndef __tlb_remove_pud_tlb_entry
  634. #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
  635. #endif
  636. #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \
  637. do { \
  638. tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \
  639. __tlb_remove_pud_tlb_entry(tlb, pudp, address); \
  640. } while (0)
  641. /*
  642. * For things like page tables caches (ie caching addresses "inside" the
  643. * page tables, like x86 does), for legacy reasons, flushing an
  644. * individual page had better flush the page table caches behind it. This
  645. * is definitely how x86 works, for example. And if you have an
  646. * architected non-legacy page table cache (which I'm not aware of
  647. * anybody actually doing), you're going to have some architecturally
  648. * explicit flushing for that, likely *separate* from a regular TLB entry
  649. * flush, and thus you'd need more than just some range expansion..
  650. *
  651. * So if we ever find an architecture
  652. * that would want something that odd, I think it is up to that
  653. * architecture to do its own odd thing, not cause pain for others
  654. * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
  655. *
  656. * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
  657. */
  658. #ifndef pte_free_tlb
  659. #define pte_free_tlb(tlb, ptep, address) \
  660. do { \
  661. tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \
  662. tlb->freed_tables = 1; \
  663. __pte_free_tlb(tlb, ptep, address); \
  664. } while (0)
  665. #endif
  666. #ifndef pmd_free_tlb
  667. #define pmd_free_tlb(tlb, pmdp, address) \
  668. do { \
  669. tlb_flush_pud_range(tlb, address, PAGE_SIZE); \
  670. tlb->freed_tables = 1; \
  671. __pmd_free_tlb(tlb, pmdp, address); \
  672. } while (0)
  673. #endif
  674. #ifndef pud_free_tlb
  675. #define pud_free_tlb(tlb, pudp, address) \
  676. do { \
  677. tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \
  678. tlb->freed_tables = 1; \
  679. __pud_free_tlb(tlb, pudp, address); \
  680. } while (0)
  681. #endif
  682. #ifndef p4d_free_tlb
  683. #define p4d_free_tlb(tlb, pudp, address) \
  684. do { \
  685. __tlb_adjust_range(tlb, address, PAGE_SIZE); \
  686. tlb->freed_tables = 1; \
  687. __p4d_free_tlb(tlb, pudp, address); \
  688. } while (0)
  689. #endif
  690. #ifndef pte_needs_flush
  691. static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
  692. {
  693. return true;
  694. }
  695. #endif
  696. #ifndef huge_pmd_needs_flush
  697. static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
  698. {
  699. return true;
  700. }
  701. #endif
  702. #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
  703. static inline void tlb_unshare_pmd_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt,
  704. unsigned long addr)
  705. {
  706. /*
  707. * The caller must make sure that concurrent unsharing + exclusive
  708. * reuse is impossible until tlb_flush_unshared_tables() was called.
  709. */
  710. VM_WARN_ON_ONCE(!ptdesc_pmd_is_shared(pt));
  711. ptdesc_pmd_pts_dec(pt);
  712. /* Clearing a PUD pointing at a PMD table with PMD leaves. */
  713. tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE);
  714. /*
  715. * If the page table is now exclusively owned, we fully unshared
  716. * a page table.
  717. */
  718. if (!ptdesc_pmd_is_shared(pt))
  719. tlb->fully_unshared_tables = true;
  720. tlb->unshared_tables = true;
  721. }
  722. static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb)
  723. {
  724. /*
  725. * As soon as the caller drops locks to allow for reuse of
  726. * previously-shared tables, these tables could get modified and
  727. * even reused outside of hugetlb context, so we have to make sure that
  728. * any page table walkers (incl. TLB, GUP-fast) are aware of that
  729. * change.
  730. *
  731. * Even if we are not fully unsharing a PMD table, we must
  732. * flush the TLB for the unsharer now.
  733. */
  734. if (tlb->unshared_tables)
  735. tlb_flush_mmu_tlbonly(tlb);
  736. /*
  737. * Similarly, we must make sure that concurrent GUP-fast will not
  738. * walk previously-shared page tables that are getting modified+reused
  739. * elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast.
  740. *
  741. * We only perform this when we are the last sharer of a page table,
  742. * as the IPI will reach all CPUs: any GUP-fast.
  743. *
  744. * Note that on configs where tlb_remove_table_sync_one() is a NOP,
  745. * the expectation is that the tlb_flush_mmu_tlbonly() would have issued
  746. * required IPIs already for us.
  747. */
  748. if (tlb->fully_unshared_tables) {
  749. tlb_remove_table_sync_one();
  750. tlb->fully_unshared_tables = false;
  751. }
  752. }
  753. #endif /* CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
  754. #endif /* CONFIG_MMU */
  755. #endif /* _ASM_GENERIC__TLB_H */