test_vmalloc.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Test module for stress and analyze performance of vmalloc allocator.
  4. * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
  5. */
  6. #include <linux/init.h>
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/vmalloc.h>
  10. #include <linux/random.h>
  11. #include <linux/kthread.h>
  12. #include <linux/moduleparam.h>
  13. #include <linux/completion.h>
  14. #include <linux/delay.h>
  15. #include <linux/mm.h>
  16. #include <linux/rcupdate.h>
  17. #include <linux/srcu.h>
  18. #include <linux/slab.h>
  19. #define __param(type, name, init, msg) \
  20. static type name = init; \
  21. module_param(name, type, 0444); \
  22. MODULE_PARM_DESC(name, msg) \
  23. __param(int, nr_threads, 0,
  24. "Number of workers to perform tests(min: 1 max: USHRT_MAX)");
  25. __param(bool, sequential_test_order, false,
  26. "Use sequential stress tests order");
  27. __param(int, test_repeat_count, 1,
  28. "Set test repeat counter");
  29. __param(int, test_loop_count, 1000000,
  30. "Set test loop counter");
  31. __param(int, nr_pages, 0,
  32. "Set number of pages for fix_size_alloc_test(default: 1)");
  33. __param(bool, use_huge, false,
  34. "Use vmalloc_huge in fix_size_alloc_test");
  35. __param(int, run_test_mask, 7,
  36. "Set tests specified in the mask.\n\n"
  37. "\t\tid: 1, name: fix_size_alloc_test\n"
  38. "\t\tid: 2, name: full_fit_alloc_test\n"
  39. "\t\tid: 4, name: long_busy_list_alloc_test\n"
  40. "\t\tid: 8, name: random_size_alloc_test\n"
  41. "\t\tid: 16, name: fix_align_alloc_test\n"
  42. "\t\tid: 32, name: random_size_align_alloc_test\n"
  43. "\t\tid: 64, name: align_shift_alloc_test\n"
  44. "\t\tid: 128, name: pcpu_alloc_test\n"
  45. "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n"
  46. "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n"
  47. "\t\tid: 1024, name: vm_map_ram_test\n"
  48. "\t\tid: 2048, name: no_block_alloc_test\n"
  49. /* Add a new test case description here. */
  50. );
  51. __param(int, nr_pcpu_objects, 35000,
  52. "Number of pcpu objects to allocate for pcpu_alloc_test");
  53. /*
  54. * This is for synchronization of setup phase.
  55. */
  56. DEFINE_STATIC_SRCU(prepare_for_test_srcu);
  57. /*
  58. * Completion tracking for worker threads.
  59. */
  60. static DECLARE_COMPLETION(test_all_done_comp);
  61. static atomic_t test_n_undone = ATOMIC_INIT(0);
  62. static inline void
  63. test_report_one_done(void)
  64. {
  65. if (atomic_dec_and_test(&test_n_undone))
  66. complete(&test_all_done_comp);
  67. }
  68. static int random_size_align_alloc_test(void)
  69. {
  70. unsigned long size, align;
  71. unsigned int rnd;
  72. void *ptr;
  73. int i;
  74. for (i = 0; i < test_loop_count; i++) {
  75. rnd = get_random_u8();
  76. /*
  77. * Maximum 1024 pages, if PAGE_SIZE is 4096.
  78. */
  79. align = 1 << (rnd % 23);
  80. /*
  81. * Maximum 10 pages.
  82. */
  83. size = ((rnd % 10) + 1) * PAGE_SIZE;
  84. ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
  85. __builtin_return_address(0));
  86. if (!ptr)
  87. return -1;
  88. vfree(ptr);
  89. }
  90. return 0;
  91. }
  92. /*
  93. * This test case is supposed to be failed.
  94. */
  95. static int align_shift_alloc_test(void)
  96. {
  97. unsigned long align;
  98. void *ptr;
  99. int i;
  100. for (i = 0; i < BITS_PER_LONG; i++) {
  101. align = 1UL << i;
  102. ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
  103. __builtin_return_address(0));
  104. if (!ptr)
  105. return -1;
  106. vfree(ptr);
  107. }
  108. return 0;
  109. }
  110. static int fix_align_alloc_test(void)
  111. {
  112. void *ptr;
  113. int i;
  114. for (i = 0; i < test_loop_count; i++) {
  115. ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
  116. GFP_KERNEL | __GFP_ZERO, 0,
  117. __builtin_return_address(0));
  118. if (!ptr)
  119. return -1;
  120. vfree(ptr);
  121. }
  122. return 0;
  123. }
  124. static int random_size_alloc_test(void)
  125. {
  126. unsigned int n;
  127. void *p;
  128. int i;
  129. for (i = 0; i < test_loop_count; i++) {
  130. n = get_random_u32_inclusive(1, 100);
  131. p = vmalloc(n * PAGE_SIZE);
  132. if (!p)
  133. return -1;
  134. *((__u8 *)p) = 1;
  135. vfree(p);
  136. }
  137. return 0;
  138. }
  139. static int long_busy_list_alloc_test(void)
  140. {
  141. void *ptr_1, *ptr_2;
  142. void **ptr;
  143. int rv = -1;
  144. int i;
  145. ptr = vmalloc(sizeof(void *) * 15000);
  146. if (!ptr)
  147. return rv;
  148. for (i = 0; i < 15000; i++)
  149. ptr[i] = vmalloc(1 * PAGE_SIZE);
  150. for (i = 0; i < test_loop_count; i++) {
  151. ptr_1 = vmalloc(100 * PAGE_SIZE);
  152. if (!ptr_1)
  153. goto leave;
  154. ptr_2 = vmalloc(1 * PAGE_SIZE);
  155. if (!ptr_2) {
  156. vfree(ptr_1);
  157. goto leave;
  158. }
  159. *((__u8 *)ptr_1) = 0;
  160. *((__u8 *)ptr_2) = 1;
  161. vfree(ptr_1);
  162. vfree(ptr_2);
  163. }
  164. /* Success */
  165. rv = 0;
  166. leave:
  167. for (i = 0; i < 15000; i++)
  168. vfree(ptr[i]);
  169. vfree(ptr);
  170. return rv;
  171. }
  172. static int full_fit_alloc_test(void)
  173. {
  174. void **ptr, **junk_ptr, *tmp;
  175. int junk_length;
  176. int rv = -1;
  177. int i;
  178. junk_length = fls(num_online_cpus());
  179. junk_length *= (32 * 1024 * 1024 / PAGE_SIZE);
  180. ptr = vmalloc(sizeof(void *) * junk_length);
  181. if (!ptr)
  182. return rv;
  183. junk_ptr = vmalloc(sizeof(void *) * junk_length);
  184. if (!junk_ptr) {
  185. vfree(ptr);
  186. return rv;
  187. }
  188. for (i = 0; i < junk_length; i++) {
  189. ptr[i] = vmalloc(1 * PAGE_SIZE);
  190. junk_ptr[i] = vmalloc(1 * PAGE_SIZE);
  191. }
  192. for (i = 0; i < junk_length; i++)
  193. vfree(junk_ptr[i]);
  194. for (i = 0; i < test_loop_count; i++) {
  195. tmp = vmalloc(1 * PAGE_SIZE);
  196. if (!tmp)
  197. goto error;
  198. *((__u8 *)tmp) = 1;
  199. vfree(tmp);
  200. }
  201. /* Success */
  202. rv = 0;
  203. error:
  204. for (i = 0; i < junk_length; i++)
  205. vfree(ptr[i]);
  206. vfree(ptr);
  207. vfree(junk_ptr);
  208. return rv;
  209. }
  210. static int fix_size_alloc_test(void)
  211. {
  212. void *ptr;
  213. int i;
  214. for (i = 0; i < test_loop_count; i++) {
  215. if (use_huge)
  216. ptr = vmalloc_huge((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE, GFP_KERNEL);
  217. else
  218. ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE);
  219. if (!ptr)
  220. return -1;
  221. *((__u8 *)ptr) = 0;
  222. vfree(ptr);
  223. }
  224. return 0;
  225. }
  226. static int no_block_alloc_test(void)
  227. {
  228. void *ptr;
  229. int i;
  230. for (i = 0; i < test_loop_count; i++) {
  231. bool use_atomic = !!(get_random_u8() % 2);
  232. gfp_t gfp = use_atomic ? GFP_ATOMIC : GFP_NOWAIT;
  233. unsigned long size = (nr_pages > 0 ? nr_pages : 1) * PAGE_SIZE;
  234. preempt_disable();
  235. ptr = __vmalloc(size, gfp);
  236. preempt_enable();
  237. if (!ptr)
  238. return -1;
  239. *((__u8 *)ptr) = 0;
  240. vfree(ptr);
  241. }
  242. return 0;
  243. }
  244. static int
  245. pcpu_alloc_test(void)
  246. {
  247. int rv = 0;
  248. #ifndef CONFIG_NEED_PER_CPU_KM
  249. void __percpu **pcpu;
  250. size_t size, align;
  251. int i;
  252. pcpu = vmalloc(sizeof(void __percpu *) * nr_pcpu_objects);
  253. if (!pcpu)
  254. return -1;
  255. for (i = 0; i < nr_pcpu_objects; i++) {
  256. size = get_random_u32_inclusive(1, PAGE_SIZE / 4);
  257. /*
  258. * Maximum PAGE_SIZE
  259. */
  260. align = 1 << get_random_u32_inclusive(1, PAGE_SHIFT - 1);
  261. pcpu[i] = __alloc_percpu(size, align);
  262. if (!pcpu[i])
  263. rv = -1;
  264. }
  265. for (i = 0; i < nr_pcpu_objects; i++)
  266. free_percpu(pcpu[i]);
  267. vfree(pcpu);
  268. #endif
  269. return rv;
  270. }
  271. struct test_kvfree_rcu {
  272. struct rcu_head rcu;
  273. unsigned char array[20];
  274. };
  275. static int
  276. kvfree_rcu_1_arg_vmalloc_test(void)
  277. {
  278. struct test_kvfree_rcu *p;
  279. int i;
  280. for (i = 0; i < test_loop_count; i++) {
  281. p = vmalloc(1 * PAGE_SIZE);
  282. if (!p)
  283. return -1;
  284. p->array[0] = 'a';
  285. kvfree_rcu_mightsleep(p);
  286. }
  287. return 0;
  288. }
  289. static int
  290. kvfree_rcu_2_arg_vmalloc_test(void)
  291. {
  292. struct test_kvfree_rcu *p;
  293. int i;
  294. for (i = 0; i < test_loop_count; i++) {
  295. p = vmalloc(1 * PAGE_SIZE);
  296. if (!p)
  297. return -1;
  298. p->array[0] = 'a';
  299. kvfree_rcu(p, rcu);
  300. }
  301. return 0;
  302. }
  303. static int
  304. vm_map_ram_test(void)
  305. {
  306. unsigned long nr_allocated;
  307. unsigned int map_nr_pages;
  308. unsigned char *v_ptr;
  309. struct page **pages;
  310. int i;
  311. map_nr_pages = nr_pages > 0 ? nr_pages:1;
  312. pages = kzalloc_objs(struct page *, map_nr_pages);
  313. if (!pages)
  314. return -1;
  315. nr_allocated = alloc_pages_bulk(GFP_KERNEL, map_nr_pages, pages);
  316. if (nr_allocated != map_nr_pages)
  317. goto cleanup;
  318. /* Run the test loop. */
  319. for (i = 0; i < test_loop_count; i++) {
  320. v_ptr = vm_map_ram(pages, map_nr_pages, NUMA_NO_NODE);
  321. *v_ptr = 'a';
  322. vm_unmap_ram(v_ptr, map_nr_pages);
  323. }
  324. cleanup:
  325. for (i = 0; i < nr_allocated; i++)
  326. __free_page(pages[i]);
  327. kfree(pages);
  328. /* 0 indicates success. */
  329. return nr_allocated != map_nr_pages;
  330. }
  331. struct test_case_desc {
  332. const char *test_name;
  333. int (*test_func)(void);
  334. bool xfail;
  335. };
  336. static struct test_case_desc test_case_array[] = {
  337. { "fix_size_alloc_test", fix_size_alloc_test, },
  338. { "full_fit_alloc_test", full_fit_alloc_test, },
  339. { "long_busy_list_alloc_test", long_busy_list_alloc_test, },
  340. { "random_size_alloc_test", random_size_alloc_test, },
  341. { "fix_align_alloc_test", fix_align_alloc_test, },
  342. { "random_size_align_alloc_test", random_size_align_alloc_test, },
  343. { "align_shift_alloc_test", align_shift_alloc_test, true },
  344. { "pcpu_alloc_test", pcpu_alloc_test, },
  345. { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, },
  346. { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, },
  347. { "vm_map_ram_test", vm_map_ram_test, },
  348. { "no_block_alloc_test", no_block_alloc_test, true },
  349. /* Add a new test case here. */
  350. };
  351. struct test_case_data {
  352. int test_failed;
  353. int test_xfailed;
  354. int test_passed;
  355. u64 time;
  356. };
  357. static struct test_driver {
  358. struct task_struct *task;
  359. struct test_case_data data[ARRAY_SIZE(test_case_array)];
  360. unsigned long start;
  361. unsigned long stop;
  362. } *tdriver;
  363. static void shuffle_array(int *arr, int n)
  364. {
  365. int i, j;
  366. for (i = n - 1; i > 0; i--) {
  367. /* Cut the range. */
  368. j = get_random_u32_below(i);
  369. /* Swap indexes. */
  370. swap(arr[i], arr[j]);
  371. }
  372. }
  373. static int test_func(void *private)
  374. {
  375. struct test_driver *t = private;
  376. int random_array[ARRAY_SIZE(test_case_array)];
  377. int index, i, j, ret;
  378. ktime_t kt;
  379. u64 delta;
  380. for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
  381. random_array[i] = i;
  382. if (!sequential_test_order)
  383. shuffle_array(random_array, ARRAY_SIZE(test_case_array));
  384. /*
  385. * Block until initialization is done.
  386. */
  387. synchronize_srcu(&prepare_for_test_srcu);
  388. t->start = get_cycles();
  389. for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
  390. index = random_array[i];
  391. /*
  392. * Skip tests if run_test_mask has been specified.
  393. */
  394. if (!((run_test_mask & (1 << index)) >> index))
  395. continue;
  396. kt = ktime_get();
  397. for (j = 0; j < test_repeat_count; j++) {
  398. ret = test_case_array[index].test_func();
  399. if (!ret)
  400. t->data[index].test_passed++;
  401. else if (ret && test_case_array[index].xfail)
  402. t->data[index].test_xfailed++;
  403. else
  404. t->data[index].test_failed++;
  405. }
  406. /*
  407. * Take an average time that test took.
  408. */
  409. delta = (u64) ktime_us_delta(ktime_get(), kt);
  410. do_div(delta, (u32) test_repeat_count);
  411. t->data[index].time = delta;
  412. }
  413. t->stop = get_cycles();
  414. test_report_one_done();
  415. /*
  416. * Wait for the kthread_stop() call.
  417. */
  418. while (!kthread_should_stop())
  419. msleep(10);
  420. return 0;
  421. }
  422. static int
  423. init_test_configuration(void)
  424. {
  425. /*
  426. * A maximum number of workers is defined as hard-coded
  427. * value and set to USHRT_MAX. We add such gap just in
  428. * case and for potential heavy stressing.
  429. */
  430. nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX);
  431. /* Allocate the space for test instances. */
  432. tdriver = kvzalloc_objs(*tdriver, nr_threads);
  433. if (tdriver == NULL)
  434. return -1;
  435. if (test_repeat_count <= 0)
  436. test_repeat_count = 1;
  437. if (test_loop_count <= 0)
  438. test_loop_count = 1;
  439. return 0;
  440. }
  441. static void do_concurrent_test(void)
  442. {
  443. int i, ret, idx;
  444. /*
  445. * Set some basic configurations plus sanity check.
  446. */
  447. ret = init_test_configuration();
  448. if (ret < 0)
  449. return;
  450. /*
  451. * Put on hold all workers.
  452. */
  453. idx = srcu_read_lock(&prepare_for_test_srcu);
  454. for (i = 0; i < nr_threads; i++) {
  455. struct test_driver *t = &tdriver[i];
  456. t->task = kthread_run(test_func, t, "vmalloc_test/%d", i);
  457. if (!IS_ERR(t->task))
  458. /* Success. */
  459. atomic_inc(&test_n_undone);
  460. else
  461. pr_err("Failed to start %d kthread\n", i);
  462. }
  463. /*
  464. * Now let the workers do their job.
  465. */
  466. srcu_read_unlock(&prepare_for_test_srcu, idx);
  467. /*
  468. * Sleep quiet until all workers are done with 1 second
  469. * interval. Since the test can take a lot of time we
  470. * can run into a stack trace of the hung task. That is
  471. * why we go with completion_timeout and HZ value.
  472. */
  473. do {
  474. ret = wait_for_completion_timeout(&test_all_done_comp, HZ);
  475. } while (!ret);
  476. for (i = 0; i < nr_threads; i++) {
  477. struct test_driver *t = &tdriver[i];
  478. int j;
  479. if (!IS_ERR(t->task))
  480. kthread_stop(t->task);
  481. for (j = 0; j < ARRAY_SIZE(test_case_array); j++) {
  482. if (!((run_test_mask & (1 << j)) >> j))
  483. continue;
  484. pr_info(
  485. "Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n",
  486. test_case_array[j].test_name,
  487. t->data[j].test_passed,
  488. t->data[j].test_failed,
  489. t->data[j].test_xfailed,
  490. test_repeat_count, test_loop_count,
  491. t->data[j].time);
  492. }
  493. pr_info("All test took worker%d=%lu cycles\n",
  494. i, t->stop - t->start);
  495. }
  496. kvfree(tdriver);
  497. }
  498. static int __init vmalloc_test_init(void)
  499. {
  500. do_concurrent_test();
  501. /* Fail will directly unload the module */
  502. return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN;
  503. }
  504. #ifdef MODULE
  505. module_init(vmalloc_test_init)
  506. #else
  507. late_initcall(vmalloc_test_init);
  508. #endif
  509. MODULE_LICENSE("GPL");
  510. MODULE_AUTHOR("Uladzislau Rezki");
  511. MODULE_DESCRIPTION("vmalloc test module");