| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636 |
- /* SPDX-License-Identifier: GPL-2.0-only */
- /*
- * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
- *
- * Iterators for Generic Page Table
- */
- #ifndef __GENERIC_PT_PT_ITER_H
- #define __GENERIC_PT_PT_ITER_H
- #include "pt_common.h"
- #include <linux/errno.h>
- /*
- * Use to mangle symbols so that backtraces and the symbol table are
- * understandable. Any non-inlined function should get mangled like this.
- */
- #define NS(fn) CONCATENATE(PTPFX, fn)
- /**
- * pt_check_range() - Validate the range can be iterated
- * @range: Range to validate
- *
- * Check that VA and last_va fall within the permitted range of VAs. If the
- * format is using PT_FEAT_SIGN_EXTEND then this also checks the sign extension
- * is correct.
- */
- static inline int pt_check_range(struct pt_range *range)
- {
- pt_vaddr_t prefix;
- PT_WARN_ON(!range->max_vasz_lg2);
- if (pt_feature(range->common, PT_FEAT_SIGN_EXTEND)) {
- PT_WARN_ON(range->common->max_vasz_lg2 != range->max_vasz_lg2);
- prefix = fvalog2_div(range->va, range->max_vasz_lg2 - 1) ?
- PT_VADDR_MAX :
- 0;
- } else {
- prefix = pt_full_va_prefix(range->common);
- }
- if (!fvalog2_div_eq(range->va, prefix, range->max_vasz_lg2) ||
- !fvalog2_div_eq(range->last_va, prefix, range->max_vasz_lg2))
- return -ERANGE;
- return 0;
- }
- /**
- * pt_index_to_va() - Update range->va to the current pts->index
- * @pts: Iteration State
- *
- * Adjust range->va to match the current index. This is done in a lazy manner
- * since computing the VA takes several instructions and is rarely required.
- */
- static inline void pt_index_to_va(struct pt_state *pts)
- {
- pt_vaddr_t lower_va;
- lower_va = log2_mul(pts->index, pt_table_item_lg2sz(pts));
- pts->range->va = fvalog2_set_mod(pts->range->va, lower_va,
- pt_table_oa_lg2sz(pts));
- }
- /*
- * Add index_count_lg2 number of entries to pts's VA and index. The VA will be
- * adjusted to the end of the contiguous block if it is currently in the middle.
- */
- static inline void _pt_advance(struct pt_state *pts,
- unsigned int index_count_lg2)
- {
- pts->index = log2_set_mod(pts->index + log2_to_int(index_count_lg2), 0,
- index_count_lg2);
- }
- /**
- * pt_entry_fully_covered() - Check if the item or entry is entirely contained
- * within pts->range
- * @pts: Iteration State
- * @oasz_lg2: The size of the item to check, pt_table_item_lg2sz() or
- * pt_entry_oa_lg2sz()
- *
- * Returns: true if the item is fully enclosed by the pts->range.
- */
- static inline bool pt_entry_fully_covered(const struct pt_state *pts,
- unsigned int oasz_lg2)
- {
- struct pt_range *range = pts->range;
- /* Range begins at the start of the entry */
- if (log2_mod(pts->range->va, oasz_lg2))
- return false;
- /* Range ends past the end of the entry */
- if (!log2_div_eq(range->va, range->last_va, oasz_lg2))
- return true;
- /* Range ends at the end of the entry */
- return log2_mod_eq_max(range->last_va, oasz_lg2);
- }
- /**
- * pt_range_to_index() - Starting index for an iteration
- * @pts: Iteration State
- *
- * Return: the starting index for the iteration in pts.
- */
- static inline unsigned int pt_range_to_index(const struct pt_state *pts)
- {
- unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
- PT_WARN_ON(pts->level > pts->range->top_level);
- if (pts->range->top_level == pts->level)
- return log2_div(fvalog2_mod(pts->range->va,
- pts->range->max_vasz_lg2),
- isz_lg2);
- return log2_mod(log2_div(pts->range->va, isz_lg2),
- pt_num_items_lg2(pts));
- }
- /**
- * pt_range_to_end_index() - Ending index iteration
- * @pts: Iteration State
- *
- * Return: the last index for the iteration in pts.
- */
- static inline unsigned int pt_range_to_end_index(const struct pt_state *pts)
- {
- unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
- struct pt_range *range = pts->range;
- unsigned int num_entries_lg2;
- if (range->va == range->last_va)
- return pts->index + 1;
- if (pts->range->top_level == pts->level)
- return log2_div(fvalog2_mod(pts->range->last_va,
- pts->range->max_vasz_lg2),
- isz_lg2) +
- 1;
- num_entries_lg2 = pt_num_items_lg2(pts);
- /* last_va falls within this table */
- if (log2_div_eq(range->va, range->last_va, num_entries_lg2 + isz_lg2))
- return log2_mod(log2_div(pts->range->last_va, isz_lg2),
- num_entries_lg2) +
- 1;
- return log2_to_int(num_entries_lg2);
- }
- static inline void _pt_iter_first(struct pt_state *pts)
- {
- pts->index = pt_range_to_index(pts);
- pts->end_index = pt_range_to_end_index(pts);
- PT_WARN_ON(pts->index > pts->end_index);
- }
- static inline bool _pt_iter_load(struct pt_state *pts)
- {
- if (pts->index >= pts->end_index)
- return false;
- pt_load_entry(pts);
- return true;
- }
- /**
- * pt_next_entry() - Advance pts to the next entry
- * @pts: Iteration State
- *
- * Update pts to go to the next index at this level. If pts is pointing at a
- * contiguous entry then the index may advance my more than one.
- */
- static inline void pt_next_entry(struct pt_state *pts)
- {
- if (pts->type == PT_ENTRY_OA &&
- !__builtin_constant_p(pt_entry_num_contig_lg2(pts) == 0))
- _pt_advance(pts, pt_entry_num_contig_lg2(pts));
- else
- pts->index++;
- pt_index_to_va(pts);
- }
- /**
- * for_each_pt_level_entry() - For loop wrapper over entries in the range
- * @pts: Iteration State
- *
- * This is the basic iteration primitive. It iterates over all the entries in
- * pts->range that fall within the pts's current table level. Each step does
- * pt_load_entry(pts).
- */
- #define for_each_pt_level_entry(pts) \
- for (_pt_iter_first(pts); _pt_iter_load(pts); pt_next_entry(pts))
- /**
- * pt_load_single_entry() - Version of pt_load_entry() usable within a walker
- * @pts: Iteration State
- *
- * Alternative to for_each_pt_level_entry() if the walker function uses only a
- * single entry.
- */
- static inline enum pt_entry_type pt_load_single_entry(struct pt_state *pts)
- {
- pts->index = pt_range_to_index(pts);
- pt_load_entry(pts);
- return pts->type;
- }
- static __always_inline struct pt_range _pt_top_range(struct pt_common *common,
- uintptr_t top_of_table)
- {
- struct pt_range range = {
- .common = common,
- .top_table =
- (struct pt_table_p *)(top_of_table &
- ~(uintptr_t)PT_TOP_LEVEL_MASK),
- .top_level = top_of_table % (1 << PT_TOP_LEVEL_BITS),
- };
- struct pt_state pts = { .range = &range, .level = range.top_level };
- unsigned int max_vasz_lg2;
- max_vasz_lg2 = common->max_vasz_lg2;
- if (pt_feature(common, PT_FEAT_DYNAMIC_TOP) &&
- pts.level != PT_MAX_TOP_LEVEL)
- max_vasz_lg2 = min_t(unsigned int, common->max_vasz_lg2,
- pt_num_items_lg2(&pts) +
- pt_table_item_lg2sz(&pts));
- /*
- * The top range will default to the lower region only with sign extend.
- */
- range.max_vasz_lg2 = max_vasz_lg2;
- if (pt_feature(common, PT_FEAT_SIGN_EXTEND))
- max_vasz_lg2--;
- range.va = fvalog2_set_mod(pt_full_va_prefix(common), 0, max_vasz_lg2);
- range.last_va =
- fvalog2_set_mod_max(pt_full_va_prefix(common), max_vasz_lg2);
- return range;
- }
- /**
- * pt_top_range() - Return a range that spans part of the top level
- * @common: Table
- *
- * For PT_FEAT_SIGN_EXTEND this will return the lower range, and cover half the
- * total page table. Otherwise it returns the entire page table.
- */
- static __always_inline struct pt_range pt_top_range(struct pt_common *common)
- {
- /*
- * The top pointer can change without locking. We capture the value and
- * it's level here and are safe to walk it so long as both values are
- * captured without tearing.
- */
- return _pt_top_range(common, READ_ONCE(common->top_of_table));
- }
- /**
- * pt_all_range() - Return a range that spans the entire page table
- * @common: Table
- *
- * The returned range spans the whole page table. Due to how PT_FEAT_SIGN_EXTEND
- * is supported range->va and range->last_va will be incorrect during the
- * iteration and must not be accessed.
- */
- static inline struct pt_range pt_all_range(struct pt_common *common)
- {
- struct pt_range range = pt_top_range(common);
- if (!pt_feature(common, PT_FEAT_SIGN_EXTEND))
- return range;
- /*
- * Pretend the table is linear from 0 without a sign extension. This
- * generates the correct indexes for iteration.
- */
- range.last_va = fvalog2_set_mod_max(0, range.max_vasz_lg2);
- return range;
- }
- /**
- * pt_upper_range() - Return a range that spans part of the top level
- * @common: Table
- *
- * For PT_FEAT_SIGN_EXTEND this will return the upper range, and cover half the
- * total page table. Otherwise it returns the entire page table.
- */
- static inline struct pt_range pt_upper_range(struct pt_common *common)
- {
- struct pt_range range = pt_top_range(common);
- if (!pt_feature(common, PT_FEAT_SIGN_EXTEND))
- return range;
- range.va = fvalog2_set_mod(PT_VADDR_MAX, 0, range.max_vasz_lg2 - 1);
- range.last_va = PT_VADDR_MAX;
- return range;
- }
- /**
- * pt_make_range() - Return a range that spans part of the table
- * @common: Table
- * @va: Start address
- * @last_va: Last address
- *
- * The caller must validate the range with pt_check_range() before using it.
- */
- static __always_inline struct pt_range
- pt_make_range(struct pt_common *common, pt_vaddr_t va, pt_vaddr_t last_va)
- {
- struct pt_range range =
- _pt_top_range(common, READ_ONCE(common->top_of_table));
- range.va = va;
- range.last_va = last_va;
- return range;
- }
- /*
- * Span a slice of the table starting at a lower table level from an active
- * walk.
- */
- static __always_inline struct pt_range
- pt_make_child_range(const struct pt_range *parent, pt_vaddr_t va,
- pt_vaddr_t last_va)
- {
- struct pt_range range = *parent;
- range.va = va;
- range.last_va = last_va;
- PT_WARN_ON(last_va < va);
- PT_WARN_ON(pt_check_range(&range));
- return range;
- }
- /**
- * pt_init() - Initialize a pt_state on the stack
- * @range: Range pointer to embed in the state
- * @level: Table level for the state
- * @table: Pointer to the table memory at level
- *
- * Helper to initialize the on-stack pt_state from walker arguments.
- */
- static __always_inline struct pt_state
- pt_init(struct pt_range *range, unsigned int level, struct pt_table_p *table)
- {
- struct pt_state pts = {
- .range = range,
- .table = table,
- .level = level,
- };
- return pts;
- }
- /**
- * pt_init_top() - Initialize a pt_state on the stack
- * @range: Range pointer to embed in the state
- *
- * The pt_state points to the top most level.
- */
- static __always_inline struct pt_state pt_init_top(struct pt_range *range)
- {
- return pt_init(range, range->top_level, range->top_table);
- }
- typedef int (*pt_level_fn_t)(struct pt_range *range, void *arg,
- unsigned int level, struct pt_table_p *table);
- /**
- * pt_descend() - Recursively invoke the walker for the lower level
- * @pts: Iteration State
- * @arg: Value to pass to the function
- * @fn: Walker function to call
- *
- * pts must point to a table item. Invoke fn as a walker on the table
- * pts points to.
- */
- static __always_inline int pt_descend(struct pt_state *pts, void *arg,
- pt_level_fn_t fn)
- {
- int ret;
- if (PT_WARN_ON(!pts->table_lower))
- return -EINVAL;
- ret = (*fn)(pts->range, arg, pts->level - 1, pts->table_lower);
- return ret;
- }
- /**
- * pt_walk_range() - Walk over a VA range
- * @range: Range pointer
- * @fn: Walker function to call
- * @arg: Value to pass to the function
- *
- * Walk over a VA range. The caller should have done a validity check, at
- * least calling pt_check_range(), when building range. The walk will
- * start at the top most table.
- */
- static __always_inline int pt_walk_range(struct pt_range *range,
- pt_level_fn_t fn, void *arg)
- {
- return fn(range, arg, range->top_level, range->top_table);
- }
- /*
- * pt_walk_descend() - Recursively invoke the walker for a slice of a lower
- * level
- * @pts: Iteration State
- * @va: Start address
- * @last_va: Last address
- * @fn: Walker function to call
- * @arg: Value to pass to the function
- *
- * With pts pointing at a table item this will descend and over a slice of the
- * lower table. The caller must ensure that va/last_va are within the table
- * item. This creates a new walk and does not alter pts or pts->range.
- */
- static __always_inline int pt_walk_descend(const struct pt_state *pts,
- pt_vaddr_t va, pt_vaddr_t last_va,
- pt_level_fn_t fn, void *arg)
- {
- struct pt_range range = pt_make_child_range(pts->range, va, last_va);
- if (PT_WARN_ON(!pt_can_have_table(pts)) ||
- PT_WARN_ON(!pts->table_lower))
- return -EINVAL;
- return fn(&range, arg, pts->level - 1, pts->table_lower);
- }
- /*
- * pt_walk_descend_all() - Recursively invoke the walker for a table item
- * @parent_pts: Iteration State
- * @fn: Walker function to call
- * @arg: Value to pass to the function
- *
- * With pts pointing at a table item this will descend and over the entire lower
- * table. This creates a new walk and does not alter pts or pts->range.
- */
- static __always_inline int
- pt_walk_descend_all(const struct pt_state *parent_pts, pt_level_fn_t fn,
- void *arg)
- {
- unsigned int isz_lg2 = pt_table_item_lg2sz(parent_pts);
- return pt_walk_descend(parent_pts,
- log2_set_mod(parent_pts->range->va, 0, isz_lg2),
- log2_set_mod_max(parent_pts->range->va, isz_lg2),
- fn, arg);
- }
- /**
- * pt_range_slice() - Return a range that spans indexes
- * @pts: Iteration State
- * @start_index: Starting index within pts
- * @end_index: Ending index within pts
- *
- * Create a range than spans an index range of the current table level
- * pt_state points at.
- */
- static inline struct pt_range pt_range_slice(const struct pt_state *pts,
- unsigned int start_index,
- unsigned int end_index)
- {
- unsigned int table_lg2sz = pt_table_oa_lg2sz(pts);
- pt_vaddr_t last_va;
- pt_vaddr_t va;
- va = fvalog2_set_mod(pts->range->va,
- log2_mul(start_index, pt_table_item_lg2sz(pts)),
- table_lg2sz);
- last_va = fvalog2_set_mod(
- pts->range->va,
- log2_mul(end_index, pt_table_item_lg2sz(pts)) - 1, table_lg2sz);
- return pt_make_child_range(pts->range, va, last_va);
- }
- /**
- * pt_top_memsize_lg2()
- * @common: Table
- * @top_of_table: Top of table value from _pt_top_set()
- *
- * Compute the allocation size of the top table. For PT_FEAT_DYNAMIC_TOP this
- * will compute the top size assuming the table will grow.
- */
- static inline unsigned int pt_top_memsize_lg2(struct pt_common *common,
- uintptr_t top_of_table)
- {
- struct pt_range range = _pt_top_range(common, top_of_table);
- struct pt_state pts = pt_init_top(&range);
- unsigned int num_items_lg2;
- num_items_lg2 = common->max_vasz_lg2 - pt_table_item_lg2sz(&pts);
- if (range.top_level != PT_MAX_TOP_LEVEL &&
- pt_feature(common, PT_FEAT_DYNAMIC_TOP))
- num_items_lg2 = min(num_items_lg2, pt_num_items_lg2(&pts));
- /* Round up the allocation size to the minimum alignment */
- return max(ffs_t(u64, PT_TOP_PHYS_MASK),
- num_items_lg2 + ilog2(PT_ITEM_WORD_SIZE));
- }
- /**
- * pt_compute_best_pgsize() - Determine the best page size for leaf entries
- * @pgsz_bitmap: Permitted page sizes
- * @va: Starting virtual address for the leaf entry
- * @last_va: Last virtual address for the leaf entry, sets the max page size
- * @oa: Starting output address for the leaf entry
- *
- * Compute the largest page size for va, last_va, and oa together and return it
- * in lg2. The largest page size depends on the format's supported page sizes at
- * this level, and the relative alignment of the VA and OA addresses. 0 means
- * the OA cannot be stored with the provided pgsz_bitmap.
- */
- static inline unsigned int pt_compute_best_pgsize(pt_vaddr_t pgsz_bitmap,
- pt_vaddr_t va,
- pt_vaddr_t last_va,
- pt_oaddr_t oa)
- {
- unsigned int best_pgsz_lg2;
- unsigned int pgsz_lg2;
- pt_vaddr_t len = last_va - va + 1;
- pt_vaddr_t mask;
- if (PT_WARN_ON(va >= last_va))
- return 0;
- /*
- * Given a VA/OA pair the best page size is the largest page size
- * where:
- *
- * 1) VA and OA start at the page. Bitwise this is the count of least
- * significant 0 bits.
- * This also implies that last_va/oa has the same prefix as va/oa.
- */
- mask = va | oa;
- /*
- * 2) The page size is not larger than the last_va (length). Since page
- * sizes are always power of two this can't be larger than the
- * largest power of two factor of the length.
- */
- mask |= log2_to_int(vafls(len) - 1);
- best_pgsz_lg2 = vaffs(mask);
- /* Choose the highest bit <= best_pgsz_lg2 */
- if (best_pgsz_lg2 < PT_VADDR_MAX_LG2 - 1)
- pgsz_bitmap = log2_mod(pgsz_bitmap, best_pgsz_lg2 + 1);
- pgsz_lg2 = vafls(pgsz_bitmap);
- if (!pgsz_lg2)
- return 0;
- pgsz_lg2--;
- PT_WARN_ON(log2_mod(va, pgsz_lg2) != 0);
- PT_WARN_ON(oalog2_mod(oa, pgsz_lg2) != 0);
- PT_WARN_ON(va + log2_to_int(pgsz_lg2) - 1 > last_va);
- PT_WARN_ON(!log2_div_eq(va, va + log2_to_int(pgsz_lg2) - 1, pgsz_lg2));
- PT_WARN_ON(
- !oalog2_div_eq(oa, oa + log2_to_int(pgsz_lg2) - 1, pgsz_lg2));
- return pgsz_lg2;
- }
- #define _PT_MAKE_CALL_LEVEL(fn) \
- static __always_inline int fn(struct pt_range *range, void *arg, \
- unsigned int level, \
- struct pt_table_p *table) \
- { \
- static_assert(PT_MAX_TOP_LEVEL <= 5); \
- if (level == 0) \
- return CONCATENATE(fn, 0)(range, arg, 0, table); \
- if (level == 1 || PT_MAX_TOP_LEVEL == 1) \
- return CONCATENATE(fn, 1)(range, arg, 1, table); \
- if (level == 2 || PT_MAX_TOP_LEVEL == 2) \
- return CONCATENATE(fn, 2)(range, arg, 2, table); \
- if (level == 3 || PT_MAX_TOP_LEVEL == 3) \
- return CONCATENATE(fn, 3)(range, arg, 3, table); \
- if (level == 4 || PT_MAX_TOP_LEVEL == 4) \
- return CONCATENATE(fn, 4)(range, arg, 4, table); \
- return CONCATENATE(fn, 5)(range, arg, 5, table); \
- }
- static inline int __pt_make_level_fn_err(struct pt_range *range, void *arg,
- unsigned int unused_level,
- struct pt_table_p *table)
- {
- static_assert(PT_MAX_TOP_LEVEL <= 5);
- return -EPROTOTYPE;
- }
- #define __PT_MAKE_LEVEL_FN(fn, level, descend_fn, do_fn) \
- static inline int fn(struct pt_range *range, void *arg, \
- unsigned int unused_level, \
- struct pt_table_p *table) \
- { \
- return do_fn(range, arg, level, table, descend_fn); \
- }
- /**
- * PT_MAKE_LEVELS() - Build an unwound walker
- * @fn: Name of the walker function
- * @do_fn: Function to call at each level
- *
- * This builds a function call tree that can be fully inlined.
- * The caller must provide a function body in an __always_inline function::
- *
- * static __always_inline int do_fn(struct pt_range *range, void *arg,
- * unsigned int level, struct pt_table_p *table,
- * pt_level_fn_t descend_fn)
- *
- * An inline function will be created for each table level that calls do_fn with
- * a compile time constant for level and a pointer to the next lower function.
- * This generates an optimally inlined walk where each of the functions sees a
- * constant level and can codegen the exact constants/etc for that level.
- *
- * Note this can produce a lot of code!
- */
- #define PT_MAKE_LEVELS(fn, do_fn) \
- __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 0), 0, __pt_make_level_fn_err, \
- do_fn); \
- __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 1), 1, CONCATENATE(fn, 0), do_fn); \
- __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 2), 2, CONCATENATE(fn, 1), do_fn); \
- __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 3), 3, CONCATENATE(fn, 2), do_fn); \
- __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 4), 4, CONCATENATE(fn, 3), do_fn); \
- __PT_MAKE_LEVEL_FN(CONCATENATE(fn, 5), 5, CONCATENATE(fn, 4), do_fn); \
- _PT_MAKE_CALL_LEVEL(fn)
- #endif
|