stackdepot.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Stack depot - a stack trace storage that avoids duplication.
  4. *
  5. * Internally, stack depot maintains a hash table of unique stacktraces. The
  6. * stack traces themselves are stored contiguously one after another in a set
  7. * of separate page allocations.
  8. *
  9. * Author: Alexander Potapenko <glider@google.com>
  10. * Copyright (C) 2016 Google, Inc.
  11. *
  12. * Based on the code by Dmitry Chernenkov.
  13. */
  14. #define pr_fmt(fmt) "stackdepot: " fmt
  15. #include <linux/debugfs.h>
  16. #include <linux/gfp.h>
  17. #include <linux/jhash.h>
  18. #include <linux/kernel.h>
  19. #include <linux/kmsan.h>
  20. #include <linux/list.h>
  21. #include <linux/mm.h>
  22. #include <linux/mutex.h>
  23. #include <linux/poison.h>
  24. #include <linux/printk.h>
  25. #include <linux/rculist.h>
  26. #include <linux/rcupdate.h>
  27. #include <linux/refcount.h>
  28. #include <linux/slab.h>
  29. #include <linux/spinlock.h>
  30. #include <linux/stacktrace.h>
  31. #include <linux/stackdepot.h>
  32. #include <linux/string.h>
  33. #include <linux/types.h>
  34. #include <linux/memblock.h>
  35. #include <linux/kasan-enabled.h>
  36. /*
  37. * The pool_index is offset by 1 so the first record does not have a 0 handle.
  38. */
  39. static unsigned int stack_max_pools __read_mostly =
  40. MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192);
  41. static bool stack_depot_disabled;
  42. static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
  43. static bool __stack_depot_early_init_passed __initdata;
  44. /* Use one hash table bucket per 16 KB of memory. */
  45. #define STACK_HASH_TABLE_SCALE 14
  46. /* Limit the number of buckets between 4K and 1M. */
  47. #define STACK_BUCKET_NUMBER_ORDER_MIN 12
  48. #define STACK_BUCKET_NUMBER_ORDER_MAX 20
  49. /* Initial seed for jhash2. */
  50. #define STACK_HASH_SEED 0x9747b28c
  51. /* Hash table of stored stack records. */
  52. static struct list_head *stack_table;
  53. /* Fixed order of the number of table buckets. Used when KASAN is enabled. */
  54. static unsigned int stack_bucket_number_order;
  55. /* Hash mask for indexing the table. */
  56. static unsigned int stack_hash_mask;
  57. /* The lock must be held when performing pool or freelist modifications. */
  58. static DEFINE_RAW_SPINLOCK(pool_lock);
  59. /* Array of memory regions that store stack records. */
  60. static void **stack_pools __pt_guarded_by(&pool_lock);
  61. /* Newly allocated pool that is not yet added to stack_pools. */
  62. static void *new_pool;
  63. /* Number of pools in stack_pools. */
  64. static int pools_num;
  65. /* Offset to the unused space in the currently used pool. */
  66. static size_t pool_offset __guarded_by(&pool_lock) = DEPOT_POOL_SIZE;
  67. /* Freelist of stack records within stack_pools. */
  68. static __guarded_by(&pool_lock) LIST_HEAD(free_stacks);
  69. /* Statistics counters for debugfs. */
  70. enum depot_counter_id {
  71. DEPOT_COUNTER_REFD_ALLOCS,
  72. DEPOT_COUNTER_REFD_FREES,
  73. DEPOT_COUNTER_REFD_INUSE,
  74. DEPOT_COUNTER_FREELIST_SIZE,
  75. DEPOT_COUNTER_PERSIST_COUNT,
  76. DEPOT_COUNTER_PERSIST_BYTES,
  77. DEPOT_COUNTER_COUNT,
  78. };
  79. static long counters[DEPOT_COUNTER_COUNT];
  80. static const char *const counter_names[] = {
  81. [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations",
  82. [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees",
  83. [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use",
  84. [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size",
  85. [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count",
  86. [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes",
  87. };
  88. static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
  89. static int __init disable_stack_depot(char *str)
  90. {
  91. return kstrtobool(str, &stack_depot_disabled);
  92. }
  93. early_param("stack_depot_disable", disable_stack_depot);
  94. static int __init parse_max_pools(char *str)
  95. {
  96. const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1;
  97. unsigned int max_pools;
  98. int rv;
  99. rv = kstrtouint(str, 0, &max_pools);
  100. if (rv)
  101. return rv;
  102. if (max_pools < 1024) {
  103. pr_err("stack_depot_max_pools below 1024, using default of %u\n",
  104. stack_max_pools);
  105. goto out;
  106. }
  107. if (max_pools > limit) {
  108. pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n",
  109. limit, stack_max_pools);
  110. goto out;
  111. }
  112. stack_max_pools = max_pools;
  113. out:
  114. return 0;
  115. }
  116. early_param("stack_depot_max_pools", parse_max_pools);
  117. void __init stack_depot_request_early_init(void)
  118. {
  119. /* Too late to request early init now. */
  120. WARN_ON(__stack_depot_early_init_passed);
  121. __stack_depot_early_init_requested = true;
  122. }
  123. /* Initialize list_head's within the hash table. */
  124. static void init_stack_table(unsigned long entries)
  125. {
  126. unsigned long i;
  127. for (i = 0; i < entries; i++)
  128. INIT_LIST_HEAD(&stack_table[i]);
  129. }
  130. /* Allocates a hash table via memblock. Can only be used during early boot. */
  131. int __init stack_depot_early_init(void)
  132. {
  133. unsigned long entries = 0;
  134. /* This function must be called only once, from mm_init(). */
  135. if (WARN_ON(__stack_depot_early_init_passed))
  136. return 0;
  137. __stack_depot_early_init_passed = true;
  138. /*
  139. * Print disabled message even if early init has not been requested:
  140. * stack_depot_init() will not print one.
  141. */
  142. if (stack_depot_disabled) {
  143. pr_info("disabled\n");
  144. return 0;
  145. }
  146. /*
  147. * If KASAN is enabled, use the maximum order: KASAN is frequently used
  148. * in fuzzing scenarios, which leads to a large number of different
  149. * stack traces being stored in stack depot.
  150. */
  151. if (kasan_enabled() && !stack_bucket_number_order)
  152. stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX;
  153. /*
  154. * Check if early init has been requested after setting
  155. * stack_bucket_number_order: stack_depot_init() uses its value.
  156. */
  157. if (!__stack_depot_early_init_requested)
  158. return 0;
  159. /*
  160. * If stack_bucket_number_order is not set, leave entries as 0 to rely
  161. * on the automatic calculations performed by alloc_large_system_hash().
  162. */
  163. if (stack_bucket_number_order)
  164. entries = 1UL << stack_bucket_number_order;
  165. pr_info("allocating hash table via alloc_large_system_hash\n");
  166. stack_table = alloc_large_system_hash("stackdepot",
  167. sizeof(struct list_head),
  168. entries,
  169. STACK_HASH_TABLE_SCALE,
  170. HASH_EARLY,
  171. NULL,
  172. &stack_hash_mask,
  173. 1UL << STACK_BUCKET_NUMBER_ORDER_MIN,
  174. 1UL << STACK_BUCKET_NUMBER_ORDER_MAX);
  175. if (!stack_table) {
  176. pr_err("hash table allocation failed, disabling\n");
  177. stack_depot_disabled = true;
  178. return -ENOMEM;
  179. }
  180. if (!entries) {
  181. /*
  182. * Obtain the number of entries that was calculated by
  183. * alloc_large_system_hash().
  184. */
  185. entries = stack_hash_mask + 1;
  186. }
  187. init_stack_table(entries);
  188. pr_info("allocating space for %u stack pools via memblock\n",
  189. stack_max_pools);
  190. stack_pools =
  191. memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE);
  192. if (!stack_pools) {
  193. pr_err("stack pools allocation failed, disabling\n");
  194. memblock_free(stack_table, entries * sizeof(struct list_head));
  195. stack_depot_disabled = true;
  196. return -ENOMEM;
  197. }
  198. return 0;
  199. }
  200. /* Allocates a hash table via kvcalloc. Can be used after boot. */
  201. int stack_depot_init(void)
  202. {
  203. static DEFINE_MUTEX(stack_depot_init_mutex);
  204. unsigned long entries;
  205. int ret = 0;
  206. mutex_lock(&stack_depot_init_mutex);
  207. if (stack_depot_disabled || stack_table)
  208. goto out_unlock;
  209. /*
  210. * Similarly to stack_depot_early_init, use stack_bucket_number_order
  211. * if assigned, and rely on automatic scaling otherwise.
  212. */
  213. if (stack_bucket_number_order) {
  214. entries = 1UL << stack_bucket_number_order;
  215. } else {
  216. int scale = STACK_HASH_TABLE_SCALE;
  217. entries = nr_free_buffer_pages();
  218. entries = roundup_pow_of_two(entries);
  219. if (scale > PAGE_SHIFT)
  220. entries >>= (scale - PAGE_SHIFT);
  221. else
  222. entries <<= (PAGE_SHIFT - scale);
  223. }
  224. if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN)
  225. entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN;
  226. if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX)
  227. entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX;
  228. pr_info("allocating hash table of %lu entries via kvcalloc\n", entries);
  229. stack_table = kvzalloc_objs(struct list_head, entries);
  230. if (!stack_table) {
  231. pr_err("hash table allocation failed, disabling\n");
  232. stack_depot_disabled = true;
  233. ret = -ENOMEM;
  234. goto out_unlock;
  235. }
  236. stack_hash_mask = entries - 1;
  237. init_stack_table(entries);
  238. pr_info("allocating space for %u stack pools via kvcalloc\n",
  239. stack_max_pools);
  240. stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL);
  241. if (!stack_pools) {
  242. pr_err("stack pools allocation failed, disabling\n");
  243. kvfree(stack_table);
  244. stack_depot_disabled = true;
  245. ret = -ENOMEM;
  246. }
  247. out_unlock:
  248. mutex_unlock(&stack_depot_init_mutex);
  249. return ret;
  250. }
  251. EXPORT_SYMBOL_GPL(stack_depot_init);
  252. /*
  253. * Initializes new stack pool, and updates the list of pools.
  254. */
  255. static bool depot_init_pool(void **prealloc)
  256. __must_hold(&pool_lock)
  257. {
  258. lockdep_assert_held(&pool_lock);
  259. if (unlikely(pools_num >= stack_max_pools)) {
  260. /* Bail out if we reached the pool limit. */
  261. WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */
  262. WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */
  263. WARN_ONCE(1, "Stack depot reached limit capacity");
  264. return false;
  265. }
  266. if (!new_pool && *prealloc) {
  267. /* We have preallocated memory, use it. */
  268. WRITE_ONCE(new_pool, *prealloc);
  269. *prealloc = NULL;
  270. }
  271. if (!new_pool)
  272. return false; /* new_pool and *prealloc are NULL */
  273. /* Save reference to the pool to be used by depot_fetch_stack(). */
  274. stack_pools[pools_num] = new_pool;
  275. /*
  276. * Stack depot tries to keep an extra pool allocated even before it runs
  277. * out of space in the currently used pool.
  278. *
  279. * To indicate that a new preallocation is needed new_pool is reset to
  280. * NULL; do not reset to NULL if we have reached the maximum number of
  281. * pools.
  282. */
  283. if (pools_num < stack_max_pools)
  284. WRITE_ONCE(new_pool, NULL);
  285. else
  286. WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
  287. /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
  288. WRITE_ONCE(pools_num, pools_num + 1);
  289. ASSERT_EXCLUSIVE_WRITER(pools_num);
  290. pool_offset = 0;
  291. return true;
  292. }
  293. /* Keeps the preallocated memory to be used for a new stack depot pool. */
  294. static void depot_keep_new_pool(void **prealloc)
  295. __must_hold(&pool_lock)
  296. {
  297. lockdep_assert_held(&pool_lock);
  298. /*
  299. * If a new pool is already saved or the maximum number of
  300. * pools is reached, do not use the preallocated memory.
  301. */
  302. if (new_pool)
  303. return;
  304. WRITE_ONCE(new_pool, *prealloc);
  305. *prealloc = NULL;
  306. }
  307. /*
  308. * Try to initialize a new stack record from the current pool, a cached pool, or
  309. * the current pre-allocation.
  310. */
  311. static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
  312. __must_hold(&pool_lock)
  313. {
  314. struct stack_record *stack;
  315. void *current_pool;
  316. u32 pool_index;
  317. lockdep_assert_held(&pool_lock);
  318. if (pool_offset + size > DEPOT_POOL_SIZE) {
  319. if (!depot_init_pool(prealloc))
  320. return NULL;
  321. }
  322. if (WARN_ON_ONCE(pools_num < 1))
  323. return NULL;
  324. pool_index = pools_num - 1;
  325. current_pool = stack_pools[pool_index];
  326. if (WARN_ON_ONCE(!current_pool))
  327. return NULL;
  328. stack = current_pool + pool_offset;
  329. /* Pre-initialize handle once. */
  330. stack->handle.pool_index_plus_1 = pool_index + 1;
  331. stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
  332. stack->handle.extra = 0;
  333. INIT_LIST_HEAD(&stack->hash_list);
  334. pool_offset += size;
  335. return stack;
  336. }
  337. /* Try to find next free usable entry from the freelist. */
  338. static struct stack_record *depot_pop_free(void)
  339. __must_hold(&pool_lock)
  340. {
  341. struct stack_record *stack;
  342. lockdep_assert_held(&pool_lock);
  343. if (list_empty(&free_stacks))
  344. return NULL;
  345. /*
  346. * We maintain the invariant that the elements in front are least
  347. * recently used, and are therefore more likely to be associated with an
  348. * RCU grace period in the past. Consequently it is sufficient to only
  349. * check the first entry.
  350. */
  351. stack = list_first_entry(&free_stacks, struct stack_record, free_list);
  352. if (!poll_state_synchronize_rcu(stack->rcu_state))
  353. return NULL;
  354. list_del(&stack->free_list);
  355. counters[DEPOT_COUNTER_FREELIST_SIZE]--;
  356. return stack;
  357. }
  358. static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries)
  359. {
  360. const size_t used = flex_array_size(s, entries, nr_entries);
  361. const size_t unused = sizeof(s->entries) - used;
  362. WARN_ON_ONCE(sizeof(s->entries) < used);
  363. return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN);
  364. }
  365. /* Allocates a new stack in a stack depot pool. */
  366. static struct stack_record *
  367. depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc)
  368. __must_hold(&pool_lock)
  369. {
  370. struct stack_record *stack = NULL;
  371. size_t record_size;
  372. lockdep_assert_held(&pool_lock);
  373. /* This should already be checked by public API entry points. */
  374. if (WARN_ON_ONCE(!nr_entries))
  375. return NULL;
  376. /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
  377. if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES)
  378. nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES;
  379. if (flags & STACK_DEPOT_FLAG_GET) {
  380. /*
  381. * Evictable entries have to allocate the max. size so they may
  382. * safely be re-used by differently sized allocations.
  383. */
  384. record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES);
  385. stack = depot_pop_free();
  386. } else {
  387. record_size = depot_stack_record_size(stack, nr_entries);
  388. }
  389. if (!stack) {
  390. stack = depot_pop_free_pool(prealloc, record_size);
  391. if (!stack)
  392. return NULL;
  393. }
  394. /* Save the stack trace. */
  395. stack->hash = hash;
  396. stack->size = nr_entries;
  397. /* stack->handle is already filled in by depot_pop_free_pool(). */
  398. memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries));
  399. if (flags & STACK_DEPOT_FLAG_GET) {
  400. refcount_set(&stack->count, 1);
  401. counters[DEPOT_COUNTER_REFD_ALLOCS]++;
  402. counters[DEPOT_COUNTER_REFD_INUSE]++;
  403. } else {
  404. /* Warn on attempts to switch to refcounting this entry. */
  405. refcount_set(&stack->count, REFCOUNT_SATURATED);
  406. counters[DEPOT_COUNTER_PERSIST_COUNT]++;
  407. counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size;
  408. }
  409. /*
  410. * Let KMSAN know the stored stack record is initialized. This shall
  411. * prevent false positive reports if instrumented code accesses it.
  412. */
  413. kmsan_unpoison_memory(stack, record_size);
  414. return stack;
  415. }
  416. static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
  417. __must_not_hold(&pool_lock)
  418. {
  419. const int pools_num_cached = READ_ONCE(pools_num);
  420. union handle_parts parts = { .handle = handle };
  421. void *pool;
  422. u32 pool_index = parts.pool_index_plus_1 - 1;
  423. size_t offset = parts.offset << DEPOT_STACK_ALIGN;
  424. struct stack_record *stack;
  425. lockdep_assert_not_held(&pool_lock);
  426. if (pool_index >= pools_num_cached) {
  427. WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
  428. pool_index, pools_num_cached, handle);
  429. return NULL;
  430. }
  431. /* @pool_index either valid, or user passed in corrupted value. */
  432. pool = context_unsafe(stack_pools[pool_index]);
  433. if (WARN_ON(!pool))
  434. return NULL;
  435. stack = pool + offset;
  436. if (WARN_ON(!refcount_read(&stack->count)))
  437. return NULL;
  438. return stack;
  439. }
  440. /* Links stack into the freelist. */
  441. static void depot_free_stack(struct stack_record *stack)
  442. __must_not_hold(&pool_lock)
  443. {
  444. unsigned long flags;
  445. lockdep_assert_not_held(&pool_lock);
  446. raw_spin_lock_irqsave(&pool_lock, flags);
  447. printk_deferred_enter();
  448. /*
  449. * Remove the entry from the hash list. Concurrent list traversal may
  450. * still observe the entry, but since the refcount is zero, this entry
  451. * will no longer be considered as valid.
  452. */
  453. list_del_rcu(&stack->hash_list);
  454. /*
  455. * Due to being used from constrained contexts such as the allocators,
  456. * NMI, or even RCU itself, stack depot cannot rely on primitives that
  457. * would sleep (such as synchronize_rcu()) or recursively call into
  458. * stack depot again (such as call_rcu()).
  459. *
  460. * Instead, get an RCU cookie, so that we can ensure this entry isn't
  461. * moved onto another list until the next grace period, and concurrent
  462. * RCU list traversal remains safe.
  463. */
  464. stack->rcu_state = get_state_synchronize_rcu();
  465. /*
  466. * Add the entry to the freelist tail, so that older entries are
  467. * considered first - their RCU cookie is more likely to no longer be
  468. * associated with the current grace period.
  469. */
  470. list_add_tail(&stack->free_list, &free_stacks);
  471. counters[DEPOT_COUNTER_FREELIST_SIZE]++;
  472. counters[DEPOT_COUNTER_REFD_FREES]++;
  473. counters[DEPOT_COUNTER_REFD_INUSE]--;
  474. printk_deferred_exit();
  475. raw_spin_unlock_irqrestore(&pool_lock, flags);
  476. }
  477. /* Calculates the hash for a stack. */
  478. static inline u32 hash_stack(unsigned long *entries, unsigned int size)
  479. {
  480. return jhash2((u32 *)entries,
  481. array_size(size, sizeof(*entries)) / sizeof(u32),
  482. STACK_HASH_SEED);
  483. }
  484. /*
  485. * Non-instrumented version of memcmp().
  486. * Does not check the lexicographical order, only the equality.
  487. */
  488. static inline
  489. int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
  490. unsigned int n)
  491. {
  492. for ( ; n-- ; u1++, u2++) {
  493. if (*u1 != *u2)
  494. return 1;
  495. }
  496. return 0;
  497. }
  498. /* Finds a stack in a bucket of the hash table. */
  499. static inline struct stack_record *find_stack(struct list_head *bucket,
  500. unsigned long *entries, int size,
  501. u32 hash, depot_flags_t flags)
  502. {
  503. struct stack_record *stack, *ret = NULL;
  504. /*
  505. * Stack depot may be used from instrumentation that instruments RCU or
  506. * tracing itself; use variant that does not call into RCU and cannot be
  507. * traced.
  508. *
  509. * Note: Such use cases must take care when using refcounting to evict
  510. * unused entries, because the stack record free-then-reuse code paths
  511. * do call into RCU.
  512. */
  513. rcu_read_lock_sched_notrace();
  514. list_for_each_entry_rcu(stack, bucket, hash_list) {
  515. if (stack->hash != hash || stack->size != size)
  516. continue;
  517. /*
  518. * This may race with depot_free_stack() accessing the freelist
  519. * management state unioned with @entries. The refcount is zero
  520. * in that case and the below refcount_inc_not_zero() will fail.
  521. */
  522. if (data_race(stackdepot_memcmp(entries, stack->entries, size)))
  523. continue;
  524. /*
  525. * Try to increment refcount. If this succeeds, the stack record
  526. * is valid and has not yet been freed.
  527. *
  528. * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior
  529. * to then call stack_depot_put() later, and we can assume that
  530. * a stack record is never placed back on the freelist.
  531. */
  532. if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count))
  533. continue;
  534. ret = stack;
  535. break;
  536. }
  537. rcu_read_unlock_sched_notrace();
  538. return ret;
  539. }
  540. depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
  541. unsigned int nr_entries,
  542. gfp_t alloc_flags,
  543. depot_flags_t depot_flags)
  544. {
  545. struct list_head *bucket;
  546. struct stack_record *found = NULL;
  547. depot_stack_handle_t handle = 0;
  548. struct page *page = NULL;
  549. void *prealloc = NULL;
  550. bool allow_spin = gfpflags_allow_spinning(alloc_flags);
  551. bool can_alloc = (depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC) && allow_spin;
  552. unsigned long flags;
  553. u32 hash;
  554. if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK))
  555. return 0;
  556. /*
  557. * If this stack trace is from an interrupt, including anything before
  558. * interrupt entry usually leads to unbounded stack depot growth.
  559. *
  560. * Since use of filter_irq_stacks() is a requirement to ensure stack
  561. * depot can efficiently deduplicate interrupt stacks, always
  562. * filter_irq_stacks() to simplify all callers' use of stack depot.
  563. */
  564. nr_entries = filter_irq_stacks(entries, nr_entries);
  565. if (unlikely(nr_entries == 0) || stack_depot_disabled)
  566. return 0;
  567. hash = hash_stack(entries, nr_entries);
  568. bucket = &stack_table[hash & stack_hash_mask];
  569. /* Fast path: look the stack trace up without locking. */
  570. found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
  571. if (found)
  572. goto exit;
  573. /*
  574. * Allocate memory for a new pool if required now:
  575. * we won't be able to do that under the lock.
  576. */
  577. if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
  578. page = alloc_pages(gfp_nested_mask(alloc_flags),
  579. DEPOT_POOL_ORDER);
  580. if (page)
  581. prealloc = page_address(page);
  582. }
  583. if (in_nmi() || !allow_spin) {
  584. /* We can never allocate in NMI context. */
  585. WARN_ON_ONCE(can_alloc);
  586. /* Best effort; bail if we fail to take the lock. */
  587. if (!raw_spin_trylock_irqsave(&pool_lock, flags))
  588. goto exit;
  589. } else {
  590. raw_spin_lock_irqsave(&pool_lock, flags);
  591. }
  592. printk_deferred_enter();
  593. /* Try to find again, to avoid concurrently inserting duplicates. */
  594. found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
  595. if (!found) {
  596. struct stack_record *new =
  597. depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc);
  598. if (new) {
  599. /*
  600. * This releases the stack record into the bucket and
  601. * makes it visible to readers in find_stack().
  602. */
  603. list_add_rcu(&new->hash_list, bucket);
  604. found = new;
  605. }
  606. }
  607. if (prealloc) {
  608. /*
  609. * Either stack depot already contains this stack trace, or
  610. * depot_alloc_stack() did not consume the preallocated memory.
  611. * Try to keep the preallocated memory for future.
  612. */
  613. depot_keep_new_pool(&prealloc);
  614. }
  615. printk_deferred_exit();
  616. raw_spin_unlock_irqrestore(&pool_lock, flags);
  617. exit:
  618. if (prealloc) {
  619. /* Stack depot didn't use this memory, free it. */
  620. if (!allow_spin)
  621. free_pages_nolock(virt_to_page(prealloc), DEPOT_POOL_ORDER);
  622. else
  623. free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER);
  624. }
  625. if (found)
  626. handle = found->handle.handle;
  627. return handle;
  628. }
  629. EXPORT_SYMBOL_GPL(stack_depot_save_flags);
  630. depot_stack_handle_t stack_depot_save(unsigned long *entries,
  631. unsigned int nr_entries,
  632. gfp_t alloc_flags)
  633. {
  634. return stack_depot_save_flags(entries, nr_entries, alloc_flags,
  635. STACK_DEPOT_FLAG_CAN_ALLOC);
  636. }
  637. EXPORT_SYMBOL_GPL(stack_depot_save);
  638. struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle)
  639. {
  640. if (!handle)
  641. return NULL;
  642. return depot_fetch_stack(handle);
  643. }
  644. unsigned int stack_depot_fetch(depot_stack_handle_t handle,
  645. unsigned long **entries)
  646. {
  647. struct stack_record *stack;
  648. *entries = NULL;
  649. /*
  650. * Let KMSAN know *entries is initialized. This shall prevent false
  651. * positive reports if instrumented code accesses it.
  652. */
  653. kmsan_unpoison_memory(entries, sizeof(*entries));
  654. if (!handle || stack_depot_disabled)
  655. return 0;
  656. stack = depot_fetch_stack(handle);
  657. /*
  658. * Should never be NULL, otherwise this is a use-after-put (or just a
  659. * corrupt handle).
  660. */
  661. if (WARN(!stack, "corrupt handle or use after stack_depot_put()"))
  662. return 0;
  663. *entries = stack->entries;
  664. return stack->size;
  665. }
  666. EXPORT_SYMBOL_GPL(stack_depot_fetch);
  667. void stack_depot_put(depot_stack_handle_t handle)
  668. {
  669. struct stack_record *stack;
  670. if (!handle || stack_depot_disabled)
  671. return;
  672. stack = depot_fetch_stack(handle);
  673. /*
  674. * Should always be able to find the stack record, otherwise this is an
  675. * unbalanced put attempt (or corrupt handle).
  676. */
  677. if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()"))
  678. return;
  679. if (refcount_dec_and_test(&stack->count))
  680. depot_free_stack(stack);
  681. }
  682. EXPORT_SYMBOL_GPL(stack_depot_put);
  683. void stack_depot_print(depot_stack_handle_t stack)
  684. {
  685. unsigned long *entries;
  686. unsigned int nr_entries;
  687. nr_entries = stack_depot_fetch(stack, &entries);
  688. if (nr_entries > 0)
  689. stack_trace_print(entries, nr_entries, 0);
  690. }
  691. EXPORT_SYMBOL_GPL(stack_depot_print);
  692. int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
  693. int spaces)
  694. {
  695. unsigned long *entries;
  696. unsigned int nr_entries;
  697. nr_entries = stack_depot_fetch(handle, &entries);
  698. return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
  699. spaces) : 0;
  700. }
  701. EXPORT_SYMBOL_GPL(stack_depot_snprint);
  702. depot_stack_handle_t __must_check stack_depot_set_extra_bits(
  703. depot_stack_handle_t handle, unsigned int extra_bits)
  704. {
  705. union handle_parts parts = { .handle = handle };
  706. /* Don't set extra bits on empty handles. */
  707. if (!handle)
  708. return 0;
  709. parts.extra = extra_bits;
  710. return parts.handle;
  711. }
  712. EXPORT_SYMBOL(stack_depot_set_extra_bits);
  713. unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
  714. {
  715. union handle_parts parts = { .handle = handle };
  716. return parts.extra;
  717. }
  718. EXPORT_SYMBOL(stack_depot_get_extra_bits);
  719. static int stats_show(struct seq_file *seq, void *v)
  720. {
  721. /*
  722. * data race ok: These are just statistics counters, and approximate
  723. * statistics are ok for debugging.
  724. */
  725. seq_printf(seq, "pools: %d\n", data_race(pools_num));
  726. for (int i = 0; i < DEPOT_COUNTER_COUNT; i++)
  727. seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i]));
  728. return 0;
  729. }
  730. DEFINE_SHOW_ATTRIBUTE(stats);
  731. static int depot_debugfs_init(void)
  732. {
  733. struct dentry *dir;
  734. if (stack_depot_disabled)
  735. return 0;
  736. dir = debugfs_create_dir("stackdepot", NULL);
  737. debugfs_create_file("stats", 0444, dir, NULL, &stats_fops);
  738. return 0;
  739. }
  740. late_initcall(depot_debugfs_init);