| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Page table allocation functions
- *
- * Copyright IBM Corp. 2016
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
- #include <linux/sysctl.h>
- #include <linux/slab.h>
- #include <linux/mm.h>
- #include <asm/mmu_context.h>
- #include <asm/page-states.h>
- #include <asm/pgalloc.h>
- #include <asm/tlbflush.h>
- unsigned long *crst_table_alloc_noprof(struct mm_struct *mm)
- {
- gfp_t gfp = GFP_KERNEL_ACCOUNT;
- struct ptdesc *ptdesc;
- unsigned long *table;
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- ptdesc = pagetable_alloc_noprof(gfp, CRST_ALLOC_ORDER);
- if (!ptdesc)
- return NULL;
- table = ptdesc_address(ptdesc);
- __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
- return table;
- }
- void crst_table_free(struct mm_struct *mm, unsigned long *table)
- {
- if (!table)
- return;
- pagetable_free(virt_to_ptdesc(table));
- }
- static void __crst_table_upgrade(void *arg)
- {
- struct mm_struct *mm = arg;
- struct ctlreg asce;
- /* change all active ASCEs to avoid the creation of new TLBs */
- if (current->active_mm == mm) {
- asce.val = mm->context.asce;
- get_lowcore()->user_asce = asce;
- local_ctl_load(7, &asce);
- if (!test_thread_flag(TIF_ASCE_PRIMARY))
- local_ctl_load(1, &asce);
- }
- __tlb_flush_local();
- }
- int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
- {
- unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
- unsigned long asce_limit = mm->context.asce_limit;
- mmap_assert_write_locked(mm);
- /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
- VM_BUG_ON(asce_limit < _REGION2_SIZE);
- if (end <= asce_limit)
- return 0;
- if (asce_limit == _REGION2_SIZE) {
- p4d = crst_table_alloc(mm);
- if (unlikely(!p4d))
- goto err_p4d;
- crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
- pagetable_p4d_ctor(virt_to_ptdesc(p4d));
- }
- if (end > _REGION1_SIZE) {
- pgd = crst_table_alloc(mm);
- if (unlikely(!pgd))
- goto err_pgd;
- crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
- pagetable_pgd_ctor(virt_to_ptdesc(pgd));
- }
- spin_lock_bh(&mm->page_table_lock);
- if (p4d) {
- __pgd = (unsigned long *) mm->pgd;
- p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
- mm->pgd = (pgd_t *) p4d;
- mm->context.asce_limit = _REGION1_SIZE;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- mm_inc_nr_puds(mm);
- }
- if (pgd) {
- __pgd = (unsigned long *) mm->pgd;
- pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
- mm->pgd = (pgd_t *) pgd;
- mm->context.asce_limit = TASK_SIZE_MAX;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
- }
- spin_unlock_bh(&mm->page_table_lock);
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return 0;
- err_pgd:
- pagetable_dtor(virt_to_ptdesc(p4d));
- crst_table_free(mm, p4d);
- err_p4d:
- return -ENOMEM;
- }
- unsigned long *page_table_alloc_noprof(struct mm_struct *mm)
- {
- gfp_t gfp = GFP_KERNEL_ACCOUNT;
- struct ptdesc *ptdesc;
- unsigned long *table;
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- ptdesc = pagetable_alloc_noprof(gfp, 0);
- if (!ptdesc)
- return NULL;
- if (!pagetable_pte_ctor(mm, ptdesc)) {
- pagetable_free(ptdesc);
- return NULL;
- }
- table = ptdesc_address(ptdesc);
- __arch_set_page_dat(table, 1);
- memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
- memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
- return table;
- }
- void page_table_free(struct mm_struct *mm, unsigned long *table)
- {
- struct ptdesc *ptdesc = virt_to_ptdesc(table);
- if (pagetable_is_reserved(ptdesc))
- return free_reserved_ptdesc(ptdesc);
- pagetable_dtor_free(ptdesc);
- }
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE
- static void pte_free_now(struct rcu_head *head)
- {
- struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
- pagetable_dtor_free(ptdesc);
- }
- void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
- {
- struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
- call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- }
- #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- /*
- * Base infrastructure required to generate basic asces, region, segment,
- * and page tables that do not make use of enhanced features like EDAT1.
- */
- static struct kmem_cache *base_pgt_cache;
- static unsigned long *base_pgt_alloc(void)
- {
- unsigned long *table;
- table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
- if (table)
- memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
- return table;
- }
- static void base_pgt_free(unsigned long *table)
- {
- kmem_cache_free(base_pgt_cache, table);
- }
- static unsigned long *base_crst_alloc(unsigned long val)
- {
- unsigned long *table;
- struct ptdesc *ptdesc;
- ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
- if (!ptdesc)
- return NULL;
- table = ptdesc_address(ptdesc);
- crst_table_init(table, val);
- return table;
- }
- static void base_crst_free(unsigned long *table)
- {
- if (!table)
- return;
- pagetable_free(virt_to_ptdesc(table));
- }
- #define BASE_ADDR_END_FUNC(NAME, SIZE) \
- static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \
- unsigned long end) \
- { \
- unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \
- \
- return (next - 1) < (end - 1) ? next : end; \
- }
- BASE_ADDR_END_FUNC(page, PAGE_SIZE)
- BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
- BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
- BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
- BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
- static inline unsigned long base_lra(unsigned long address)
- {
- unsigned long real;
- asm volatile(
- " lra %0,0(%1)"
- : "=d" (real) : "a" (address) : "cc");
- return real;
- }
- static int base_page_walk(unsigned long *origin, unsigned long addr,
- unsigned long end, int alloc)
- {
- unsigned long *pte, next;
- if (!alloc)
- return 0;
- pte = origin;
- pte += (addr & _PAGE_INDEX) >> PAGE_SHIFT;
- do {
- next = base_page_addr_end(addr, end);
- *pte = base_lra(addr);
- } while (pte++, addr = next, addr < end);
- return 0;
- }
- static int base_segment_walk(unsigned long *origin, unsigned long addr,
- unsigned long end, int alloc)
- {
- unsigned long *ste, next, *table;
- int rc;
- ste = origin;
- ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
- do {
- next = base_segment_addr_end(addr, end);
- if (*ste & _SEGMENT_ENTRY_INVALID) {
- if (!alloc)
- continue;
- table = base_pgt_alloc();
- if (!table)
- return -ENOMEM;
- *ste = __pa(table) | _SEGMENT_ENTRY;
- }
- table = __va(*ste & _SEGMENT_ENTRY_ORIGIN);
- rc = base_page_walk(table, addr, next, alloc);
- if (rc)
- return rc;
- if (!alloc)
- base_pgt_free(table);
- cond_resched();
- } while (ste++, addr = next, addr < end);
- return 0;
- }
- static int base_region3_walk(unsigned long *origin, unsigned long addr,
- unsigned long end, int alloc)
- {
- unsigned long *rtte, next, *table;
- int rc;
- rtte = origin;
- rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
- do {
- next = base_region3_addr_end(addr, end);
- if (*rtte & _REGION_ENTRY_INVALID) {
- if (!alloc)
- continue;
- table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
- if (!table)
- return -ENOMEM;
- *rtte = __pa(table) | _REGION3_ENTRY;
- }
- table = __va(*rtte & _REGION_ENTRY_ORIGIN);
- rc = base_segment_walk(table, addr, next, alloc);
- if (rc)
- return rc;
- if (!alloc)
- base_crst_free(table);
- } while (rtte++, addr = next, addr < end);
- return 0;
- }
- static int base_region2_walk(unsigned long *origin, unsigned long addr,
- unsigned long end, int alloc)
- {
- unsigned long *rste, next, *table;
- int rc;
- rste = origin;
- rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
- do {
- next = base_region2_addr_end(addr, end);
- if (*rste & _REGION_ENTRY_INVALID) {
- if (!alloc)
- continue;
- table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
- if (!table)
- return -ENOMEM;
- *rste = __pa(table) | _REGION2_ENTRY;
- }
- table = __va(*rste & _REGION_ENTRY_ORIGIN);
- rc = base_region3_walk(table, addr, next, alloc);
- if (rc)
- return rc;
- if (!alloc)
- base_crst_free(table);
- } while (rste++, addr = next, addr < end);
- return 0;
- }
- static int base_region1_walk(unsigned long *origin, unsigned long addr,
- unsigned long end, int alloc)
- {
- unsigned long *rfte, next, *table;
- int rc;
- rfte = origin;
- rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
- do {
- next = base_region1_addr_end(addr, end);
- if (*rfte & _REGION_ENTRY_INVALID) {
- if (!alloc)
- continue;
- table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
- if (!table)
- return -ENOMEM;
- *rfte = __pa(table) | _REGION1_ENTRY;
- }
- table = __va(*rfte & _REGION_ENTRY_ORIGIN);
- rc = base_region2_walk(table, addr, next, alloc);
- if (rc)
- return rc;
- if (!alloc)
- base_crst_free(table);
- } while (rfte++, addr = next, addr < end);
- return 0;
- }
- /**
- * base_asce_free - free asce and tables returned from base_asce_alloc()
- * @asce: asce to be freed
- *
- * Frees all region, segment, and page tables that were allocated with a
- * corresponding base_asce_alloc() call.
- */
- void base_asce_free(unsigned long asce)
- {
- unsigned long *table = __va(asce & _ASCE_ORIGIN);
- if (!asce)
- return;
- switch (asce & _ASCE_TYPE_MASK) {
- case _ASCE_TYPE_SEGMENT:
- base_segment_walk(table, 0, _REGION3_SIZE, 0);
- break;
- case _ASCE_TYPE_REGION3:
- base_region3_walk(table, 0, _REGION2_SIZE, 0);
- break;
- case _ASCE_TYPE_REGION2:
- base_region2_walk(table, 0, _REGION1_SIZE, 0);
- break;
- case _ASCE_TYPE_REGION1:
- base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
- break;
- }
- base_crst_free(table);
- }
- static int base_pgt_cache_init(void)
- {
- static DEFINE_MUTEX(base_pgt_cache_mutex);
- unsigned long sz = _PAGE_TABLE_SIZE;
- if (base_pgt_cache)
- return 0;
- mutex_lock(&base_pgt_cache_mutex);
- if (!base_pgt_cache)
- base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
- mutex_unlock(&base_pgt_cache_mutex);
- return base_pgt_cache ? 0 : -ENOMEM;
- }
- /**
- * base_asce_alloc - create kernel mapping without enhanced DAT features
- * @addr: virtual start address of kernel mapping
- * @num_pages: number of consecutive pages
- *
- * Generate an asce, including all required region, segment and page tables,
- * that can be used to access the virtual kernel mapping. The difference is
- * that the returned asce does not make use of any enhanced DAT features like
- * e.g. large pages. This is required for some I/O functions that pass an
- * asce, like e.g. some service call requests.
- *
- * Note: the returned asce may NEVER be attached to any cpu. It may only be
- * used for I/O requests. tlb entries that might result because the
- * asce was attached to a cpu won't be cleared.
- */
- unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
- {
- unsigned long asce, *table, end;
- int rc;
- if (base_pgt_cache_init())
- return 0;
- end = addr + num_pages * PAGE_SIZE;
- if (end <= _REGION3_SIZE) {
- table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
- if (!table)
- return 0;
- rc = base_segment_walk(table, addr, end, 1);
- asce = __pa(table) | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
- } else if (end <= _REGION2_SIZE) {
- table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
- if (!table)
- return 0;
- rc = base_region3_walk(table, addr, end, 1);
- asce = __pa(table) | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- } else if (end <= _REGION1_SIZE) {
- table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
- if (!table)
- return 0;
- rc = base_region2_walk(table, addr, end, 1);
- asce = __pa(table) | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
- } else {
- table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
- if (!table)
- return 0;
- rc = base_region1_walk(table, addr, end, 1);
- asce = __pa(table) | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
- }
- if (rc) {
- base_asce_free(asce);
- asce = 0;
- }
- return asce;
- }
|