dup.h 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320
  1. /* SPDX-License-Identifier: GPL-2.0+ */
  2. #pragma once
  3. /* Forward declarations to avoid header cycle. */
  4. struct vm_area_struct;
  5. static inline void vma_start_write(struct vm_area_struct *vma);
  6. extern const struct vm_operations_struct vma_dummy_vm_ops;
  7. extern unsigned long stack_guard_gap;
  8. extern const struct vm_operations_struct vma_dummy_vm_ops;
  9. extern unsigned long rlimit(unsigned int limit);
  10. struct task_struct *get_current(void);
  11. #define MMF_HAS_MDWE 28
  12. #define current get_current()
  13. /*
  14. * Define the task command name length as enum, then it can be visible to
  15. * BPF programs.
  16. */
  17. enum {
  18. TASK_COMM_LEN = 16,
  19. };
  20. /* PARTIALLY implemented types. */
  21. struct mm_struct {
  22. struct maple_tree mm_mt;
  23. int map_count; /* number of VMAs */
  24. unsigned long total_vm; /* Total pages mapped */
  25. unsigned long locked_vm; /* Pages that have PG_mlocked set */
  26. unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
  27. unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
  28. unsigned long stack_vm; /* VM_STACK */
  29. unsigned long def_flags;
  30. mm_flags_t flags; /* Must use mm_flags_* helpers to access */
  31. };
  32. struct address_space {
  33. struct rb_root_cached i_mmap;
  34. unsigned long flags;
  35. atomic_t i_mmap_writable;
  36. };
  37. struct file_operations {
  38. int (*mmap)(struct file *, struct vm_area_struct *);
  39. int (*mmap_prepare)(struct vm_area_desc *);
  40. };
  41. struct file {
  42. struct address_space *f_mapping;
  43. const struct file_operations *f_op;
  44. };
  45. struct anon_vma_chain {
  46. struct anon_vma *anon_vma;
  47. struct list_head same_vma;
  48. };
  49. struct task_struct {
  50. char comm[TASK_COMM_LEN];
  51. pid_t pid;
  52. struct mm_struct *mm;
  53. /* Used for emulating ABI behavior of previous Linux versions: */
  54. unsigned int personality;
  55. };
  56. struct kref {
  57. refcount_t refcount;
  58. };
  59. struct anon_vma_name {
  60. struct kref kref;
  61. /* The name needs to be at the end because it is dynamically sized. */
  62. char name[];
  63. };
  64. /*
  65. * Contains declarations that are DUPLICATED from kernel source in order to
  66. * faciliate userland VMA testing.
  67. *
  68. * These must be kept in sync with kernel source.
  69. */
  70. #define VMA_LOCK_OFFSET 0x40000000
  71. typedef struct { unsigned long v; } freeptr_t;
  72. #define VM_NONE 0x00000000
  73. typedef int __bitwise vma_flag_t;
  74. #define ACCESS_PRIVATE(p, member) ((p)->member)
  75. #define DECLARE_VMA_BIT(name, bitnum) \
  76. VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
  77. #define DECLARE_VMA_BIT_ALIAS(name, aliased) \
  78. VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
  79. enum {
  80. DECLARE_VMA_BIT(READ, 0),
  81. DECLARE_VMA_BIT(WRITE, 1),
  82. DECLARE_VMA_BIT(EXEC, 2),
  83. DECLARE_VMA_BIT(SHARED, 3),
  84. /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
  85. DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
  86. DECLARE_VMA_BIT(MAYWRITE, 5),
  87. DECLARE_VMA_BIT(MAYEXEC, 6),
  88. DECLARE_VMA_BIT(MAYSHARE, 7),
  89. DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
  90. #ifdef CONFIG_MMU
  91. DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
  92. #else
  93. /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
  94. DECLARE_VMA_BIT(MAYOVERLAY, 9),
  95. #endif /* CONFIG_MMU */
  96. /* Page-ranges managed without "struct page", just pure PFN */
  97. DECLARE_VMA_BIT(PFNMAP, 10),
  98. DECLARE_VMA_BIT(MAYBE_GUARD, 11),
  99. DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
  100. DECLARE_VMA_BIT(LOCKED, 13),
  101. DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
  102. DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
  103. DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
  104. DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
  105. DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
  106. DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
  107. DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
  108. DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
  109. DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
  110. DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
  111. DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
  112. DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
  113. DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
  114. DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
  115. DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
  116. DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
  117. DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
  118. DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
  119. /* These bits are reused, we define specific uses below. */
  120. DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
  121. DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
  122. DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
  123. DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
  124. DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
  125. DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
  126. DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
  127. /*
  128. * This flag is used to connect VFIO to arch specific KVM code. It
  129. * indicates that the memory under this VMA is safe for use with any
  130. * non-cachable memory type inside KVM. Some VFIO devices, on some
  131. * platforms, are thought to be unsafe and can cause machine crashes
  132. * if KVM does not lock down the memory type.
  133. */
  134. DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
  135. #ifdef CONFIG_PPC32
  136. DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
  137. #else
  138. DECLARE_VMA_BIT(DROPPABLE, 40),
  139. #endif
  140. DECLARE_VMA_BIT(UFFD_MINOR, 41),
  141. DECLARE_VMA_BIT(SEALED, 42),
  142. /* Flags that reuse flags above. */
  143. DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
  144. DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
  145. DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
  146. DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
  147. DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
  148. #if defined(CONFIG_X86_USER_SHADOW_STACK)
  149. /*
  150. * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
  151. * support core mm.
  152. *
  153. * These VMAs will get a single end guard page. This helps userspace
  154. * protect itself from attacks. A single page is enough for current
  155. * shadow stack archs (x86). See the comments near alloc_shstk() in
  156. * arch/x86/kernel/shstk.c for more details on the guard size.
  157. */
  158. DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
  159. #elif defined(CONFIG_ARM64_GCS)
  160. /*
  161. * arm64's Guarded Control Stack implements similar functionality and
  162. * has similar constraints to shadow stacks.
  163. */
  164. DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
  165. #endif
  166. DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
  167. DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
  168. DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
  169. DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
  170. DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
  171. DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
  172. DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
  173. DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
  174. #ifdef CONFIG_STACK_GROWSUP
  175. DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
  176. DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
  177. #else
  178. DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
  179. #endif
  180. };
  181. #define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
  182. #define VM_READ INIT_VM_FLAG(READ)
  183. #define VM_WRITE INIT_VM_FLAG(WRITE)
  184. #define VM_EXEC INIT_VM_FLAG(EXEC)
  185. #define VM_SHARED INIT_VM_FLAG(SHARED)
  186. #define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
  187. #define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
  188. #define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
  189. #define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
  190. #define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
  191. #ifdef CONFIG_MMU
  192. #define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
  193. #else
  194. #define VM_UFFD_MISSING VM_NONE
  195. #define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
  196. #endif
  197. #define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
  198. #define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
  199. #define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
  200. #define VM_LOCKED INIT_VM_FLAG(LOCKED)
  201. #define VM_IO INIT_VM_FLAG(IO)
  202. #define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
  203. #define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
  204. #define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
  205. #define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
  206. #define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
  207. #define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
  208. #define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
  209. #define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
  210. #define VM_SYNC INIT_VM_FLAG(SYNC)
  211. #define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
  212. #define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
  213. #define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
  214. #ifdef CONFIG_MEM_SOFT_DIRTY
  215. #define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
  216. #else
  217. #define VM_SOFTDIRTY VM_NONE
  218. #endif
  219. #define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
  220. #define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
  221. #define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
  222. #define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
  223. #define VM_STACK INIT_VM_FLAG(STACK)
  224. #ifdef CONFIG_STACK_GROWS_UP
  225. #define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
  226. #else
  227. #define VM_STACK_EARLY VM_NONE
  228. #endif
  229. #ifdef CONFIG_ARCH_HAS_PKEYS
  230. #define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
  231. /* Despite the naming, these are FLAGS not bits. */
  232. #define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
  233. #define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
  234. #define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
  235. #if CONFIG_ARCH_PKEY_BITS > 3
  236. #define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
  237. #else
  238. #define VM_PKEY_BIT3 VM_NONE
  239. #endif /* CONFIG_ARCH_PKEY_BITS > 3 */
  240. #if CONFIG_ARCH_PKEY_BITS > 4
  241. #define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
  242. #else
  243. #define VM_PKEY_BIT4 VM_NONE
  244. #endif /* CONFIG_ARCH_PKEY_BITS > 4 */
  245. #endif /* CONFIG_ARCH_HAS_PKEYS */
  246. #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
  247. #define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
  248. #else
  249. #define VM_SHADOW_STACK VM_NONE
  250. #endif
  251. #if defined(CONFIG_PPC64)
  252. #define VM_SAO INIT_VM_FLAG(SAO)
  253. #elif defined(CONFIG_PARISC)
  254. #define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
  255. #elif defined(CONFIG_SPARC64)
  256. #define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
  257. #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
  258. #elif defined(CONFIG_ARM64)
  259. #define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
  260. #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
  261. #elif !defined(CONFIG_MMU)
  262. #define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
  263. #endif
  264. #ifndef VM_GROWSUP
  265. #define VM_GROWSUP VM_NONE
  266. #endif
  267. #ifdef CONFIG_ARM64_MTE
  268. #define VM_MTE INIT_VM_FLAG(MTE)
  269. #define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
  270. #else
  271. #define VM_MTE VM_NONE
  272. #define VM_MTE_ALLOWED VM_NONE
  273. #endif
  274. #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
  275. #define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
  276. #else
  277. #define VM_UFFD_MINOR VM_NONE
  278. #endif
  279. #ifdef CONFIG_64BIT
  280. #define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
  281. #define VM_SEALED INIT_VM_FLAG(SEALED)
  282. #else
  283. #define VM_ALLOW_ANY_UNCACHED VM_NONE
  284. #define VM_SEALED VM_NONE
  285. #endif
  286. #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
  287. #define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
  288. #else
  289. #define VM_DROPPABLE VM_NONE
  290. #endif
  291. /* Bits set in the VMA until the stack is in its final location */
  292. #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
  293. #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
  294. /* Common data flag combinations */
  295. #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
  296. VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
  297. #define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
  298. VM_MAYWRITE | VM_MAYEXEC)
  299. #define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
  300. VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
  301. #ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
  302. #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
  303. #endif
  304. #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
  305. #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
  306. #endif
  307. #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
  308. #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
  309. /* VMA basic access permission flags */
  310. #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
  311. /*
  312. * Special vmas that are non-mergable, non-mlock()able.
  313. */
  314. #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
  315. #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
  316. #define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
  317. #define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
  318. #define STACK_TOP TASK_SIZE_LOW
  319. #define STACK_TOP_MAX TASK_SIZE_MAX
  320. /* This mask represents all the VMA flag bits used by mlock */
  321. #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
  322. #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
  323. #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
  324. VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
  325. #define RLIMIT_STACK 3 /* max stack size */
  326. #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
  327. #define CAP_IPC_LOCK 14
  328. #define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
  329. #define VM_IGNORE_MERGE VM_STICKY
  330. #define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
  331. #define pgprot_val(x) ((x).pgprot)
  332. #define __pgprot(x) ((pgprot_t) { (x) } )
  333. #define for_each_vma(__vmi, __vma) \
  334. while (((__vma) = vma_next(&(__vmi))) != NULL)
  335. /* The MM code likes to work with exclusive end addresses */
  336. #define for_each_vma_range(__vmi, __vma, __end) \
  337. while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
  338. #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
  339. #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
  340. #define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
  341. #define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
  342. #define AS_MM_ALL_LOCKS 2
  343. #define swap(a, b) \
  344. do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
  345. /*
  346. * Flags for bug emulation.
  347. *
  348. * These occupy the top three bytes.
  349. */
  350. enum {
  351. READ_IMPLIES_EXEC = 0x0400000,
  352. };
  353. struct vma_iterator {
  354. struct ma_state mas;
  355. };
  356. #define VMA_ITERATOR(name, __mm, __addr) \
  357. struct vma_iterator name = { \
  358. .mas = { \
  359. .tree = &(__mm)->mm_mt, \
  360. .index = __addr, \
  361. .node = NULL, \
  362. .status = ma_start, \
  363. }, \
  364. }
  365. #define DEFINE_MUTEX(mutexname) \
  366. struct mutex mutexname = {}
  367. #define DECLARE_BITMAP(name, bits) \
  368. unsigned long name[BITS_TO_LONGS(bits)]
  369. #define EMPTY_VMA_FLAGS ((vma_flags_t){ })
  370. /* What action should be taken after an .mmap_prepare call is complete? */
  371. enum mmap_action_type {
  372. MMAP_NOTHING, /* Mapping is complete, no further action. */
  373. MMAP_REMAP_PFN, /* Remap PFN range. */
  374. MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
  375. };
  376. /*
  377. * Describes an action an mmap_prepare hook can instruct to be taken to complete
  378. * the mapping of a VMA. Specified in vm_area_desc.
  379. */
  380. struct mmap_action {
  381. union {
  382. /* Remap range. */
  383. struct {
  384. unsigned long start;
  385. unsigned long start_pfn;
  386. unsigned long size;
  387. pgprot_t pgprot;
  388. } remap;
  389. };
  390. enum mmap_action_type type;
  391. /*
  392. * If specified, this hook is invoked after the selected action has been
  393. * successfully completed. Note that the VMA write lock still held.
  394. *
  395. * The absolute minimum ought to be done here.
  396. *
  397. * Returns 0 on success, or an error code.
  398. */
  399. int (*success_hook)(const struct vm_area_struct *vma);
  400. /*
  401. * If specified, this hook is invoked when an error occurred when
  402. * attempting the selection action.
  403. *
  404. * The hook can return an error code in order to filter the error, but
  405. * it is not valid to clear the error here.
  406. */
  407. int (*error_hook)(int err);
  408. /*
  409. * This should be set in rare instances where the operation required
  410. * that the rmap should not be able to access the VMA until
  411. * completely set up.
  412. */
  413. bool hide_from_rmap_until_complete :1;
  414. };
  415. /* Operations which modify VMAs. */
  416. enum vma_operation {
  417. VMA_OP_SPLIT,
  418. VMA_OP_MERGE_UNFAULTED,
  419. VMA_OP_REMAP,
  420. VMA_OP_FORK,
  421. };
  422. /*
  423. * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
  424. * manipulate mutable fields which will cause those fields to be updated in the
  425. * resultant VMA.
  426. *
  427. * Helper functions are not required for manipulating any field.
  428. */
  429. struct vm_area_desc {
  430. /* Immutable state. */
  431. const struct mm_struct *const mm;
  432. struct file *const file; /* May vary from vm_file in stacked callers. */
  433. unsigned long start;
  434. unsigned long end;
  435. /* Mutable fields. Populated with initial state. */
  436. pgoff_t pgoff;
  437. struct file *vm_file;
  438. union {
  439. vm_flags_t vm_flags;
  440. vma_flags_t vma_flags;
  441. };
  442. pgprot_t page_prot;
  443. /* Write-only fields. */
  444. const struct vm_operations_struct *vm_ops;
  445. void *private_data;
  446. /* Take further action? */
  447. struct mmap_action action;
  448. };
  449. struct vm_area_struct {
  450. /* The first cache line has the info for VMA tree walking. */
  451. union {
  452. struct {
  453. /* VMA covers [vm_start; vm_end) addresses within mm */
  454. unsigned long vm_start;
  455. unsigned long vm_end;
  456. };
  457. freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
  458. };
  459. struct mm_struct *vm_mm; /* The address space we belong to. */
  460. pgprot_t vm_page_prot; /* Access permissions of this VMA. */
  461. /*
  462. * Flags, see mm.h.
  463. * To modify use vm_flags_{init|reset|set|clear|mod} functions.
  464. */
  465. union {
  466. const vm_flags_t vm_flags;
  467. vma_flags_t flags;
  468. };
  469. #ifdef CONFIG_PER_VMA_LOCK
  470. /*
  471. * Can only be written (using WRITE_ONCE()) while holding both:
  472. * - mmap_lock (in write mode)
  473. * - vm_refcnt bit at VMA_LOCK_OFFSET is set
  474. * Can be read reliably while holding one of:
  475. * - mmap_lock (in read or write mode)
  476. * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
  477. * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
  478. * while holding nothing (except RCU to keep the VMA struct allocated).
  479. *
  480. * This sequence counter is explicitly allowed to overflow; sequence
  481. * counter reuse can only lead to occasional unnecessary use of the
  482. * slowpath.
  483. */
  484. unsigned int vm_lock_seq;
  485. #endif
  486. /*
  487. * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
  488. * list, after a COW of one of the file pages. A MAP_SHARED vma
  489. * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
  490. * or brk vma (with NULL file) can only be in an anon_vma list.
  491. */
  492. struct list_head anon_vma_chain; /* Serialized by mmap_lock &
  493. * page_table_lock */
  494. struct anon_vma *anon_vma; /* Serialized by page_table_lock */
  495. /* Function pointers to deal with this struct. */
  496. const struct vm_operations_struct *vm_ops;
  497. /* Information about our backing store: */
  498. unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
  499. units */
  500. struct file * vm_file; /* File we map to (can be NULL). */
  501. void * vm_private_data; /* was vm_pte (shared mem) */
  502. #ifdef CONFIG_SWAP
  503. atomic_long_t swap_readahead_info;
  504. #endif
  505. #ifndef CONFIG_MMU
  506. struct vm_region *vm_region; /* NOMMU mapping region */
  507. #endif
  508. #ifdef CONFIG_NUMA
  509. struct mempolicy *vm_policy; /* NUMA policy for the VMA */
  510. #endif
  511. #ifdef CONFIG_NUMA_BALANCING
  512. struct vma_numab_state *numab_state; /* NUMA Balancing state */
  513. #endif
  514. #ifdef CONFIG_PER_VMA_LOCK
  515. /* Unstable RCU readers are allowed to read this. */
  516. refcount_t vm_refcnt;
  517. #endif
  518. /*
  519. * For areas with an address space and backing store,
  520. * linkage into the address_space->i_mmap interval tree.
  521. *
  522. */
  523. struct {
  524. struct rb_node rb;
  525. unsigned long rb_subtree_last;
  526. } shared;
  527. #ifdef CONFIG_ANON_VMA_NAME
  528. /*
  529. * For private and shared anonymous mappings, a pointer to a null
  530. * terminated string containing the name given to the vma, or NULL if
  531. * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
  532. */
  533. struct anon_vma_name *anon_name;
  534. #endif
  535. struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
  536. } __randomize_layout;
  537. struct vm_operations_struct {
  538. void (*open)(struct vm_area_struct * area);
  539. /**
  540. * @close: Called when the VMA is being removed from the MM.
  541. * Context: User context. May sleep. Caller holds mmap_lock.
  542. */
  543. void (*close)(struct vm_area_struct * area);
  544. /* Called any time before splitting to check if it's allowed */
  545. int (*may_split)(struct vm_area_struct *area, unsigned long addr);
  546. int (*mremap)(struct vm_area_struct *area);
  547. /*
  548. * Called by mprotect() to make driver-specific permission
  549. * checks before mprotect() is finalised. The VMA must not
  550. * be modified. Returns 0 if mprotect() can proceed.
  551. */
  552. int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
  553. unsigned long end, unsigned long newflags);
  554. vm_fault_t (*fault)(struct vm_fault *vmf);
  555. vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
  556. vm_fault_t (*map_pages)(struct vm_fault *vmf,
  557. pgoff_t start_pgoff, pgoff_t end_pgoff);
  558. unsigned long (*pagesize)(struct vm_area_struct * area);
  559. /* notification that a previously read-only page is about to become
  560. * writable, if an error is returned it will cause a SIGBUS */
  561. vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
  562. /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
  563. vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
  564. /* called by access_process_vm when get_user_pages() fails, typically
  565. * for use by special VMAs. See also generic_access_phys() for a generic
  566. * implementation useful for any iomem mapping.
  567. */
  568. int (*access)(struct vm_area_struct *vma, unsigned long addr,
  569. void *buf, int len, int write);
  570. /* Called by the /proc/PID/maps code to ask the vma whether it
  571. * has a special name. Returning non-NULL will also cause this
  572. * vma to be dumped unconditionally. */
  573. const char *(*name)(struct vm_area_struct *vma);
  574. #ifdef CONFIG_NUMA
  575. /*
  576. * set_policy() op must add a reference to any non-NULL @new mempolicy
  577. * to hold the policy upon return. Caller should pass NULL @new to
  578. * remove a policy and fall back to surrounding context--i.e. do not
  579. * install a MPOL_DEFAULT policy, nor the task or system default
  580. * mempolicy.
  581. */
  582. int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
  583. /*
  584. * get_policy() op must add reference [mpol_get()] to any policy at
  585. * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
  586. * in mm/mempolicy.c will do this automatically.
  587. * get_policy() must NOT add a ref if the policy at (vma,addr) is not
  588. * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
  589. * If no [shared/vma] mempolicy exists at the addr, get_policy() op
  590. * must return NULL--i.e., do not "fallback" to task or system default
  591. * policy.
  592. */
  593. struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
  594. unsigned long addr, pgoff_t *ilx);
  595. #endif
  596. #ifdef CONFIG_FIND_NORMAL_PAGE
  597. /*
  598. * Called by vm_normal_page() for special PTEs in @vma at @addr. This
  599. * allows for returning a "normal" page from vm_normal_page() even
  600. * though the PTE indicates that the "struct page" either does not exist
  601. * or should not be touched: "special".
  602. *
  603. * Do not add new users: this really only works when a "normal" page
  604. * was mapped, but then the PTE got changed to something weird (+
  605. * marked special) that would not make pte_pfn() identify the originally
  606. * inserted page.
  607. */
  608. struct page *(*find_normal_page)(struct vm_area_struct *vma,
  609. unsigned long addr);
  610. #endif /* CONFIG_FIND_NORMAL_PAGE */
  611. };
  612. struct vm_unmapped_area_info {
  613. #define VM_UNMAPPED_AREA_TOPDOWN 1
  614. unsigned long flags;
  615. unsigned long length;
  616. unsigned long low_limit;
  617. unsigned long high_limit;
  618. unsigned long align_mask;
  619. unsigned long align_offset;
  620. unsigned long start_gap;
  621. };
  622. struct pagetable_move_control {
  623. struct vm_area_struct *old; /* Source VMA. */
  624. struct vm_area_struct *new; /* Destination VMA. */
  625. unsigned long old_addr; /* Address from which the move begins. */
  626. unsigned long old_end; /* Exclusive address at which old range ends. */
  627. unsigned long new_addr; /* Address to move page tables to. */
  628. unsigned long len_in; /* Bytes to remap specified by user. */
  629. bool need_rmap_locks; /* Do rmap locks need to be taken? */
  630. bool for_stack; /* Is this an early temp stack being moved? */
  631. };
  632. #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
  633. struct pagetable_move_control name = { \
  634. .old = old_, \
  635. .new = new_, \
  636. .old_addr = old_addr_, \
  637. .old_end = (old_addr_) + (len_), \
  638. .new_addr = new_addr_, \
  639. .len_in = len_, \
  640. }
  641. static inline void vma_iter_invalidate(struct vma_iterator *vmi)
  642. {
  643. mas_pause(&vmi->mas);
  644. }
  645. static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
  646. {
  647. return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
  648. }
  649. static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
  650. {
  651. return __pgprot(vm_flags);
  652. }
  653. static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
  654. {
  655. return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
  656. }
  657. /*
  658. * Copy value to the first system word of VMA flags, non-atomically.
  659. *
  660. * IMPORTANT: This does not overwrite bytes past the first system word. The
  661. * caller must account for this.
  662. */
  663. static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
  664. {
  665. *ACCESS_PRIVATE(flags, __vma_flags) = value;
  666. }
  667. /*
  668. * Copy value to the first system word of VMA flags ONCE, non-atomically.
  669. *
  670. * IMPORTANT: This does not overwrite bytes past the first system word. The
  671. * caller must account for this.
  672. */
  673. static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
  674. {
  675. unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
  676. WRITE_ONCE(*bitmap, value);
  677. }
  678. /* Update the first system word of VMA flags setting bits, non-atomically. */
  679. static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
  680. {
  681. unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
  682. *bitmap |= value;
  683. }
  684. /* Update the first system word of VMA flags clearing bits, non-atomically. */
  685. static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
  686. {
  687. unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
  688. *bitmap &= ~value;
  689. }
  690. static inline void vma_flags_clear_all(vma_flags_t *flags)
  691. {
  692. bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
  693. }
  694. static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
  695. {
  696. unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
  697. __set_bit((__force int)bit, bitmap);
  698. }
  699. /* Use when VMA is not part of the VMA tree and needs no locking */
  700. static inline void vm_flags_init(struct vm_area_struct *vma,
  701. vm_flags_t flags)
  702. {
  703. vma_flags_clear_all(&vma->flags);
  704. vma_flags_overwrite_word(&vma->flags, flags);
  705. }
  706. /*
  707. * Use when VMA is part of the VMA tree and modifications need coordination
  708. * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
  709. * it should be locked explicitly beforehand.
  710. */
  711. static inline void vm_flags_reset(struct vm_area_struct *vma,
  712. vm_flags_t flags)
  713. {
  714. vma_assert_write_locked(vma);
  715. vm_flags_init(vma, flags);
  716. }
  717. static inline void vm_flags_reset_once(struct vm_area_struct *vma,
  718. vm_flags_t flags)
  719. {
  720. vma_assert_write_locked(vma);
  721. /*
  722. * The user should only be interested in avoiding reordering of
  723. * assignment to the first word.
  724. */
  725. vma_flags_clear_all(&vma->flags);
  726. vma_flags_overwrite_word_once(&vma->flags, flags);
  727. }
  728. static inline void vm_flags_set(struct vm_area_struct *vma,
  729. vm_flags_t flags)
  730. {
  731. vma_start_write(vma);
  732. vma_flags_set_word(&vma->flags, flags);
  733. }
  734. static inline void vm_flags_clear(struct vm_area_struct *vma,
  735. vm_flags_t flags)
  736. {
  737. vma_start_write(vma);
  738. vma_flags_clear_word(&vma->flags, flags);
  739. }
  740. static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits);
  741. #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
  742. (const vma_flag_t []){__VA_ARGS__})
  743. static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
  744. vma_flags_t to_test)
  745. {
  746. const unsigned long *bitmap = flags->__vma_flags;
  747. const unsigned long *bitmap_to_test = to_test.__vma_flags;
  748. return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
  749. }
  750. #define vma_flags_test(flags, ...) \
  751. vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
  752. static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
  753. vma_flags_t to_test)
  754. {
  755. const unsigned long *bitmap = flags->__vma_flags;
  756. const unsigned long *bitmap_to_test = to_test.__vma_flags;
  757. return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
  758. }
  759. #define vma_flags_test_all(flags, ...) \
  760. vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
  761. static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
  762. {
  763. unsigned long *bitmap = flags->__vma_flags;
  764. const unsigned long *bitmap_to_set = to_set.__vma_flags;
  765. bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS);
  766. }
  767. #define vma_flags_set(flags, ...) \
  768. vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
  769. static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
  770. {
  771. unsigned long *bitmap = flags->__vma_flags;
  772. const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
  773. bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS);
  774. }
  775. #define vma_flags_clear(flags, ...) \
  776. vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
  777. static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
  778. vma_flags_t flags)
  779. {
  780. return vma_flags_test_all_mask(&vma->flags, flags);
  781. }
  782. #define vma_test_all_flags(vma, ...) \
  783. vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
  784. static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
  785. {
  786. return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
  787. (VM_SHARED | VM_MAYWRITE);
  788. }
  789. static inline void vma_set_flags_mask(struct vm_area_struct *vma,
  790. vma_flags_t flags)
  791. {
  792. vma_flags_set_mask(&vma->flags, flags);
  793. }
  794. #define vma_set_flags(vma, ...) \
  795. vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
  796. static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
  797. vma_flags_t flags)
  798. {
  799. return vma_flags_test_mask(&desc->vma_flags, flags);
  800. }
  801. #define vma_desc_test_flags(desc, ...) \
  802. vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
  803. static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
  804. vma_flags_t flags)
  805. {
  806. vma_flags_set_mask(&desc->vma_flags, flags);
  807. }
  808. #define vma_desc_set_flags(desc, ...) \
  809. vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
  810. static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
  811. vma_flags_t flags)
  812. {
  813. vma_flags_clear_mask(&desc->vma_flags, flags);
  814. }
  815. #define vma_desc_clear_flags(desc, ...) \
  816. vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
  817. static inline bool is_shared_maywrite(const vma_flags_t *flags)
  818. {
  819. return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
  820. }
  821. static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
  822. {
  823. return is_shared_maywrite(&vma->flags);
  824. }
  825. static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
  826. {
  827. /*
  828. * Uses mas_find() to get the first VMA when the iterator starts.
  829. * Calling mas_next() could skip the first entry.
  830. */
  831. return mas_find(&vmi->mas, ULONG_MAX);
  832. }
  833. /*
  834. * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
  835. * assertions should be made either under mmap_write_lock or when the object
  836. * has been isolated under mmap_write_lock, ensuring no competing writers.
  837. */
  838. static inline void vma_assert_attached(struct vm_area_struct *vma)
  839. {
  840. WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
  841. }
  842. static inline void vma_assert_detached(struct vm_area_struct *vma)
  843. {
  844. WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
  845. }
  846. static inline void vma_assert_write_locked(struct vm_area_struct *);
  847. static inline void vma_mark_attached(struct vm_area_struct *vma)
  848. {
  849. vma_assert_write_locked(vma);
  850. vma_assert_detached(vma);
  851. refcount_set_release(&vma->vm_refcnt, 1);
  852. }
  853. static inline void vma_mark_detached(struct vm_area_struct *vma)
  854. {
  855. vma_assert_write_locked(vma);
  856. vma_assert_attached(vma);
  857. /* We are the only writer, so no need to use vma_refcount_put(). */
  858. if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
  859. /*
  860. * Reader must have temporarily raised vm_refcnt but it will
  861. * drop it without using the vma since vma is write-locked.
  862. */
  863. }
  864. }
  865. static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
  866. {
  867. memset(vma, 0, sizeof(*vma));
  868. vma->vm_mm = mm;
  869. vma->vm_ops = &vma_dummy_vm_ops;
  870. INIT_LIST_HEAD(&vma->anon_vma_chain);
  871. vma->vm_lock_seq = UINT_MAX;
  872. }
  873. /*
  874. * These are defined in vma.h, but sadly vm_stat_account() is referenced by
  875. * kernel/fork.c, so we have to these broadly available there, and temporarily
  876. * define them here to resolve the dependency cycle.
  877. */
  878. #define is_exec_mapping(flags) \
  879. ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
  880. #define is_stack_mapping(flags) \
  881. (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
  882. #define is_data_mapping(flags) \
  883. ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
  884. static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
  885. long npages)
  886. {
  887. WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
  888. if (is_exec_mapping(flags))
  889. mm->exec_vm += npages;
  890. else if (is_stack_mapping(flags))
  891. mm->stack_vm += npages;
  892. else if (is_data_mapping(flags))
  893. mm->data_vm += npages;
  894. }
  895. #undef is_exec_mapping
  896. #undef is_stack_mapping
  897. #undef is_data_mapping
  898. static inline void vm_unacct_memory(long pages)
  899. {
  900. vm_acct_memory(-pages);
  901. }
  902. static inline void mapping_allow_writable(struct address_space *mapping)
  903. {
  904. atomic_inc(&mapping->i_mmap_writable);
  905. }
  906. static inline
  907. struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
  908. {
  909. return mas_find(&vmi->mas, max - 1);
  910. }
  911. static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
  912. unsigned long start, unsigned long end, gfp_t gfp)
  913. {
  914. __mas_set_range(&vmi->mas, start, end - 1);
  915. mas_store_gfp(&vmi->mas, NULL, gfp);
  916. if (unlikely(mas_is_err(&vmi->mas)))
  917. return -ENOMEM;
  918. return 0;
  919. }
  920. static inline void vma_set_anonymous(struct vm_area_struct *vma)
  921. {
  922. vma->vm_ops = NULL;
  923. }
  924. /* Declared in vma.h. */
  925. static inline void set_vma_from_desc(struct vm_area_struct *vma,
  926. struct vm_area_desc *desc);
  927. static inline int __compat_vma_mmap(const struct file_operations *f_op,
  928. struct file *file, struct vm_area_struct *vma)
  929. {
  930. struct vm_area_desc desc = {
  931. .mm = vma->vm_mm,
  932. .file = file,
  933. .start = vma->vm_start,
  934. .end = vma->vm_end,
  935. .pgoff = vma->vm_pgoff,
  936. .vm_file = vma->vm_file,
  937. .vm_flags = vma->vm_flags,
  938. .page_prot = vma->vm_page_prot,
  939. .action.type = MMAP_NOTHING, /* Default */
  940. };
  941. int err;
  942. err = f_op->mmap_prepare(&desc);
  943. if (err)
  944. return err;
  945. mmap_action_prepare(&desc.action, &desc);
  946. set_vma_from_desc(vma, &desc);
  947. return mmap_action_complete(&desc.action, vma);
  948. }
  949. static inline int compat_vma_mmap(struct file *file,
  950. struct vm_area_struct *vma)
  951. {
  952. return __compat_vma_mmap(file->f_op, file, vma);
  953. }
  954. static inline void vma_iter_init(struct vma_iterator *vmi,
  955. struct mm_struct *mm, unsigned long addr)
  956. {
  957. mas_init(&vmi->mas, &mm->mm_mt, addr);
  958. }
  959. static inline unsigned long vma_pages(struct vm_area_struct *vma)
  960. {
  961. return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
  962. }
  963. static inline void mmap_assert_locked(struct mm_struct *);
  964. static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
  965. unsigned long start_addr,
  966. unsigned long end_addr)
  967. {
  968. unsigned long index = start_addr;
  969. mmap_assert_locked(mm);
  970. return mt_find(&mm->mm_mt, &index, end_addr - 1);
  971. }
  972. static inline
  973. struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
  974. {
  975. return mtree_load(&mm->mm_mt, addr);
  976. }
  977. static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
  978. {
  979. return mas_prev(&vmi->mas, 0);
  980. }
  981. static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
  982. {
  983. mas_set(&vmi->mas, addr);
  984. }
  985. static inline bool vma_is_anonymous(struct vm_area_struct *vma)
  986. {
  987. return !vma->vm_ops;
  988. }
  989. /* Defined in vma.h, so temporarily define here to avoid circular dependency. */
  990. #define vma_iter_load(vmi) \
  991. mas_walk(&(vmi)->mas)
  992. static inline struct vm_area_struct *
  993. find_vma_prev(struct mm_struct *mm, unsigned long addr,
  994. struct vm_area_struct **pprev)
  995. {
  996. struct vm_area_struct *vma;
  997. VMA_ITERATOR(vmi, mm, addr);
  998. vma = vma_iter_load(&vmi);
  999. *pprev = vma_prev(&vmi);
  1000. if (!vma)
  1001. vma = vma_next(&vmi);
  1002. return vma;
  1003. }
  1004. #undef vma_iter_load
  1005. static inline void vma_iter_free(struct vma_iterator *vmi)
  1006. {
  1007. mas_destroy(&vmi->mas);
  1008. }
  1009. static inline
  1010. struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
  1011. {
  1012. return mas_next_range(&vmi->mas, ULONG_MAX);
  1013. }
  1014. bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
  1015. /* Update vma->vm_page_prot to reflect vma->vm_flags. */
  1016. static inline void vma_set_page_prot(struct vm_area_struct *vma)
  1017. {
  1018. vm_flags_t vm_flags = vma->vm_flags;
  1019. pgprot_t vm_page_prot;
  1020. /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
  1021. vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
  1022. if (vma_wants_writenotify(vma, vm_page_prot)) {
  1023. vm_flags &= ~VM_SHARED;
  1024. /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
  1025. vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
  1026. }
  1027. /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
  1028. WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
  1029. }
  1030. static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
  1031. {
  1032. if (vma->vm_flags & VM_GROWSDOWN)
  1033. return stack_guard_gap;
  1034. /* See reasoning around the VM_SHADOW_STACK definition */
  1035. if (vma->vm_flags & VM_SHADOW_STACK)
  1036. return PAGE_SIZE;
  1037. return 0;
  1038. }
  1039. static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
  1040. {
  1041. unsigned long gap = stack_guard_start_gap(vma);
  1042. unsigned long vm_start = vma->vm_start;
  1043. vm_start -= gap;
  1044. if (vm_start > vma->vm_start)
  1045. vm_start = 0;
  1046. return vm_start;
  1047. }
  1048. static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
  1049. {
  1050. unsigned long vm_end = vma->vm_end;
  1051. if (vma->vm_flags & VM_GROWSUP) {
  1052. vm_end += stack_guard_gap;
  1053. if (vm_end < vma->vm_end)
  1054. vm_end = -PAGE_SIZE;
  1055. }
  1056. return vm_end;
  1057. }
  1058. static inline bool vma_is_accessible(struct vm_area_struct *vma)
  1059. {
  1060. return vma->vm_flags & VM_ACCESS_FLAGS;
  1061. }
  1062. static inline bool mlock_future_ok(const struct mm_struct *mm,
  1063. vm_flags_t vm_flags, unsigned long bytes)
  1064. {
  1065. unsigned long locked_pages, limit_pages;
  1066. if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
  1067. return true;
  1068. locked_pages = bytes >> PAGE_SHIFT;
  1069. locked_pages += mm->locked_vm;
  1070. limit_pages = rlimit(RLIMIT_MEMLOCK);
  1071. limit_pages >>= PAGE_SHIFT;
  1072. return locked_pages <= limit_pages;
  1073. }
  1074. static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
  1075. {
  1076. /* If MDWE is disabled, we have nothing to deny. */
  1077. if (mm_flags_test(MMF_HAS_MDWE, current->mm))
  1078. return false;
  1079. /* If the new VMA is not executable, we have nothing to deny. */
  1080. if (!(new & VM_EXEC))
  1081. return false;
  1082. /* Under MDWE we do not accept newly writably executable VMAs... */
  1083. if (new & VM_WRITE)
  1084. return true;
  1085. /* ...nor previously non-executable VMAs becoming executable. */
  1086. if (!(old & VM_EXEC))
  1087. return true;
  1088. return false;
  1089. }
  1090. static inline int mapping_map_writable(struct address_space *mapping)
  1091. {
  1092. return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
  1093. 0 : -EPERM;
  1094. }
  1095. /* Did the driver provide valid mmap hook configuration? */
  1096. static inline bool can_mmap_file(struct file *file)
  1097. {
  1098. bool has_mmap = file->f_op->mmap;
  1099. bool has_mmap_prepare = file->f_op->mmap_prepare;
  1100. /* Hooks are mutually exclusive. */
  1101. if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
  1102. return false;
  1103. if (!has_mmap && !has_mmap_prepare)
  1104. return false;
  1105. return true;
  1106. }
  1107. static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
  1108. {
  1109. if (file->f_op->mmap_prepare)
  1110. return compat_vma_mmap(file, vma);
  1111. return file->f_op->mmap(file, vma);
  1112. }
  1113. static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
  1114. {
  1115. return file->f_op->mmap_prepare(desc);
  1116. }
  1117. static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
  1118. {
  1119. /* Changing an anonymous vma with this is illegal */
  1120. get_file(file);
  1121. swap(vma->vm_file, file);
  1122. fput(file);
  1123. }