test_objpool.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Test module for lockless object pool
  4. *
  5. * Copyright: wuqiang.matt@bytedance.com
  6. */
  7. #include <linux/errno.h>
  8. #include <linux/module.h>
  9. #include <linux/moduleparam.h>
  10. #include <linux/completion.h>
  11. #include <linux/kthread.h>
  12. #include <linux/slab.h>
  13. #include <linux/vmalloc.h>
  14. #include <linux/delay.h>
  15. #include <linux/hrtimer.h>
  16. #include <linux/objpool.h>
  17. #define OT_NR_MAX_BULK (16)
  18. /* memory usage */
  19. struct ot_mem_stat {
  20. atomic_long_t alloc;
  21. atomic_long_t free;
  22. };
  23. /* object allocation results */
  24. struct ot_obj_stat {
  25. unsigned long nhits;
  26. unsigned long nmiss;
  27. };
  28. /* control & results per testcase */
  29. struct ot_data {
  30. struct rw_semaphore start;
  31. struct completion wait;
  32. struct completion rcu;
  33. atomic_t nthreads ____cacheline_aligned_in_smp;
  34. atomic_t stop ____cacheline_aligned_in_smp;
  35. struct ot_mem_stat kmalloc;
  36. struct ot_mem_stat vmalloc;
  37. struct ot_obj_stat objects;
  38. u64 duration;
  39. };
  40. /* testcase */
  41. struct ot_test {
  42. int async; /* synchronous or asynchronous */
  43. int mode; /* only mode 0 supported */
  44. int objsz; /* object size */
  45. int duration; /* ms */
  46. int delay; /* ms */
  47. int bulk_normal;
  48. int bulk_irq;
  49. unsigned long hrtimer; /* ms */
  50. const char *name;
  51. struct ot_data data;
  52. };
  53. /* per-cpu worker */
  54. struct ot_item {
  55. struct objpool_head *pool; /* pool head */
  56. struct ot_test *test; /* test parameters */
  57. void (*worker)(struct ot_item *item, int irq);
  58. /* hrtimer control */
  59. ktime_t hrtcycle;
  60. struct hrtimer hrtimer;
  61. int bulk[2]; /* for thread and irq */
  62. int delay;
  63. u32 niters;
  64. /* summary per thread */
  65. struct ot_obj_stat stat[2]; /* thread and irq */
  66. u64 duration;
  67. };
  68. /*
  69. * memory leakage checking
  70. */
  71. static void *ot_kzalloc(struct ot_test *test, long size)
  72. {
  73. void *ptr = kzalloc(size, GFP_KERNEL);
  74. if (ptr)
  75. atomic_long_add(size, &test->data.kmalloc.alloc);
  76. return ptr;
  77. }
  78. static void ot_kfree(struct ot_test *test, void *ptr, long size)
  79. {
  80. if (!ptr)
  81. return;
  82. atomic_long_add(size, &test->data.kmalloc.free);
  83. kfree(ptr);
  84. }
  85. static void ot_mem_report(struct ot_test *test)
  86. {
  87. long alloc, free;
  88. pr_info("memory allocation summary for %s\n", test->name);
  89. alloc = atomic_long_read(&test->data.kmalloc.alloc);
  90. free = atomic_long_read(&test->data.kmalloc.free);
  91. pr_info(" kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
  92. alloc = atomic_long_read(&test->data.vmalloc.alloc);
  93. free = atomic_long_read(&test->data.vmalloc.free);
  94. pr_info(" vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
  95. }
  96. /* user object instance */
  97. struct ot_node {
  98. void *owner;
  99. unsigned long data;
  100. unsigned long refs;
  101. unsigned long payload[32];
  102. };
  103. /* user objpool manager */
  104. struct ot_context {
  105. struct objpool_head pool; /* objpool head */
  106. struct ot_test *test; /* test parameters */
  107. void *ptr; /* user pool buffer */
  108. unsigned long size; /* buffer size */
  109. struct rcu_head rcu;
  110. };
  111. static DEFINE_PER_CPU(struct ot_item, ot_pcup_items);
  112. static int ot_init_data(struct ot_data *data)
  113. {
  114. memset(data, 0, sizeof(*data));
  115. init_rwsem(&data->start);
  116. init_completion(&data->wait);
  117. init_completion(&data->rcu);
  118. atomic_set(&data->nthreads, 1);
  119. return 0;
  120. }
  121. static int ot_init_node(void *nod, void *context)
  122. {
  123. struct ot_context *sop = context;
  124. struct ot_node *on = nod;
  125. on->owner = &sop->pool;
  126. return 0;
  127. }
  128. static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
  129. {
  130. struct ot_item *item = container_of(hrt, struct ot_item, hrtimer);
  131. struct ot_test *test = item->test;
  132. if (atomic_read_acquire(&test->data.stop))
  133. return HRTIMER_NORESTART;
  134. /* do bulk-testings for objects pop/push */
  135. item->worker(item, 1);
  136. hrtimer_forward_now(hrt, item->hrtcycle);
  137. return HRTIMER_RESTART;
  138. }
  139. static void ot_start_hrtimer(struct ot_item *item)
  140. {
  141. if (!item->test->hrtimer)
  142. return;
  143. hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL);
  144. }
  145. static void ot_stop_hrtimer(struct ot_item *item)
  146. {
  147. if (!item->test->hrtimer)
  148. return;
  149. hrtimer_cancel(&item->hrtimer);
  150. }
  151. static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer)
  152. {
  153. struct hrtimer *hrt = &item->hrtimer;
  154. if (!hrtimer)
  155. return -ENOENT;
  156. item->hrtcycle = ktime_set(0, hrtimer * 1000000UL);
  157. hrtimer_setup(hrt, ot_hrtimer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  158. return 0;
  159. }
  160. static int ot_init_cpu_item(struct ot_item *item,
  161. struct ot_test *test,
  162. struct objpool_head *pool,
  163. void (*worker)(struct ot_item *, int))
  164. {
  165. memset(item, 0, sizeof(*item));
  166. item->pool = pool;
  167. item->test = test;
  168. item->worker = worker;
  169. item->bulk[0] = test->bulk_normal;
  170. item->bulk[1] = test->bulk_irq;
  171. item->delay = test->delay;
  172. /* initialize hrtimer */
  173. ot_init_hrtimer(item, item->test->hrtimer);
  174. return 0;
  175. }
  176. static int ot_thread_worker(void *arg)
  177. {
  178. struct ot_item *item = arg;
  179. struct ot_test *test = item->test;
  180. ktime_t start;
  181. atomic_inc(&test->data.nthreads);
  182. down_read(&test->data.start);
  183. up_read(&test->data.start);
  184. start = ktime_get();
  185. ot_start_hrtimer(item);
  186. do {
  187. if (atomic_read_acquire(&test->data.stop))
  188. break;
  189. /* do bulk-testings for objects pop/push */
  190. item->worker(item, 0);
  191. } while (!kthread_should_stop());
  192. ot_stop_hrtimer(item);
  193. item->duration = (u64) ktime_us_delta(ktime_get(), start);
  194. if (atomic_dec_and_test(&test->data.nthreads))
  195. complete(&test->data.wait);
  196. return 0;
  197. }
  198. static void ot_perf_report(struct ot_test *test, u64 duration)
  199. {
  200. struct ot_obj_stat total, normal = {0}, irq = {0};
  201. int cpu, nthreads = 0;
  202. pr_info("\n");
  203. pr_info("Testing summary for %s\n", test->name);
  204. for_each_possible_cpu(cpu) {
  205. struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
  206. if (!item->duration)
  207. continue;
  208. normal.nhits += item->stat[0].nhits;
  209. normal.nmiss += item->stat[0].nmiss;
  210. irq.nhits += item->stat[1].nhits;
  211. irq.nmiss += item->stat[1].nmiss;
  212. pr_info("CPU: %d duration: %lluus\n", cpu, item->duration);
  213. pr_info("\tthread:\t%16lu hits \t%16lu miss\n",
  214. item->stat[0].nhits, item->stat[0].nmiss);
  215. pr_info("\tirq: \t%16lu hits \t%16lu miss\n",
  216. item->stat[1].nhits, item->stat[1].nmiss);
  217. pr_info("\ttotal: \t%16lu hits \t%16lu miss\n",
  218. item->stat[0].nhits + item->stat[1].nhits,
  219. item->stat[0].nmiss + item->stat[1].nmiss);
  220. nthreads++;
  221. }
  222. total.nhits = normal.nhits + irq.nhits;
  223. total.nmiss = normal.nmiss + irq.nmiss;
  224. pr_info("ALL: \tnthreads: %d duration: %lluus\n", nthreads, duration);
  225. pr_info("SUM: \t%16lu hits \t%16lu miss\n",
  226. total.nhits, total.nmiss);
  227. test->data.objects = total;
  228. test->data.duration = duration;
  229. }
  230. /*
  231. * synchronous test cases for objpool manipulation
  232. */
  233. /* objpool manipulation for synchronous mode (percpu objpool) */
  234. static struct ot_context *ot_init_sync_m0(struct ot_test *test)
  235. {
  236. struct ot_context *sop = NULL;
  237. int max = num_possible_cpus() << 3;
  238. gfp_t gfp = GFP_KERNEL;
  239. sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
  240. if (!sop)
  241. return NULL;
  242. sop->test = test;
  243. if (test->objsz < 512)
  244. gfp = GFP_ATOMIC;
  245. if (objpool_init(&sop->pool, max, test->objsz,
  246. gfp, sop, ot_init_node, NULL)) {
  247. ot_kfree(test, sop, sizeof(*sop));
  248. return NULL;
  249. }
  250. WARN_ON(max != sop->pool.nr_objs);
  251. return sop;
  252. }
  253. static void ot_fini_sync(struct ot_context *sop)
  254. {
  255. objpool_fini(&sop->pool);
  256. ot_kfree(sop->test, sop, sizeof(*sop));
  257. }
  258. static struct {
  259. struct ot_context * (*init)(struct ot_test *oc);
  260. void (*fini)(struct ot_context *sop);
  261. } g_ot_sync_ops[] = {
  262. {.init = ot_init_sync_m0, .fini = ot_fini_sync},
  263. };
  264. /*
  265. * synchronous test cases: performance mode
  266. */
  267. static void ot_bulk_sync(struct ot_item *item, int irq)
  268. {
  269. struct ot_node *nods[OT_NR_MAX_BULK];
  270. int i;
  271. for (i = 0; i < item->bulk[irq]; i++)
  272. nods[i] = objpool_pop(item->pool);
  273. if (!irq && (item->delay || !(++(item->niters) & 0x7FFF)))
  274. msleep(item->delay);
  275. while (i-- > 0) {
  276. struct ot_node *on = nods[i];
  277. if (on) {
  278. on->refs++;
  279. objpool_push(on, item->pool);
  280. item->stat[irq].nhits++;
  281. } else {
  282. item->stat[irq].nmiss++;
  283. }
  284. }
  285. }
  286. static int ot_start_sync(struct ot_test *test)
  287. {
  288. struct ot_context *sop;
  289. ktime_t start;
  290. u64 duration;
  291. unsigned long timeout;
  292. int cpu;
  293. /* initialize objpool for syncrhonous testcase */
  294. sop = g_ot_sync_ops[test->mode].init(test);
  295. if (!sop)
  296. return -ENOMEM;
  297. /* grab rwsem to block testing threads */
  298. down_write(&test->data.start);
  299. for_each_possible_cpu(cpu) {
  300. struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
  301. struct task_struct *work;
  302. ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync);
  303. /* skip offline cpus */
  304. if (!cpu_online(cpu))
  305. continue;
  306. work = kthread_run_on_cpu(ot_thread_worker, item,
  307. cpu, "ot_worker_%d");
  308. if (IS_ERR(work))
  309. pr_err("failed to create thread for cpu %d\n", cpu);
  310. }
  311. /* wait a while to make sure all threads waiting at start line */
  312. msleep(20);
  313. /* in case no threads were created: memory insufficient ? */
  314. if (atomic_dec_and_test(&test->data.nthreads))
  315. complete(&test->data.wait);
  316. // sched_set_fifo_low(current);
  317. /* start objpool testing threads */
  318. start = ktime_get();
  319. up_write(&test->data.start);
  320. /* yeild cpu to worker threads for duration ms */
  321. timeout = msecs_to_jiffies(test->duration);
  322. schedule_timeout_interruptible(timeout);
  323. /* tell workers threads to quit */
  324. atomic_set_release(&test->data.stop, 1);
  325. /* wait all workers threads finish and quit */
  326. wait_for_completion(&test->data.wait);
  327. duration = (u64) ktime_us_delta(ktime_get(), start);
  328. /* cleanup objpool */
  329. g_ot_sync_ops[test->mode].fini(sop);
  330. /* report testing summary and performance results */
  331. ot_perf_report(test, duration);
  332. /* report memory allocation summary */
  333. ot_mem_report(test);
  334. return 0;
  335. }
  336. /*
  337. * asynchronous test cases: pool lifecycle controlled by refcount
  338. */
  339. static void ot_fini_async_rcu(struct rcu_head *rcu)
  340. {
  341. struct ot_context *sop = container_of(rcu, struct ot_context, rcu);
  342. struct ot_test *test = sop->test;
  343. /* here all cpus are aware of the stop event: test->data.stop = 1 */
  344. WARN_ON(!atomic_read_acquire(&test->data.stop));
  345. objpool_fini(&sop->pool);
  346. complete(&test->data.rcu);
  347. }
  348. static void ot_fini_async(struct ot_context *sop)
  349. {
  350. /* make sure the stop event is acknowledged by all cores */
  351. call_rcu(&sop->rcu, ot_fini_async_rcu);
  352. }
  353. static int ot_objpool_release(struct objpool_head *head, void *context)
  354. {
  355. struct ot_context *sop = context;
  356. WARN_ON(!head || !sop || head != &sop->pool);
  357. /* do context cleaning if needed */
  358. if (sop)
  359. ot_kfree(sop->test, sop, sizeof(*sop));
  360. return 0;
  361. }
  362. static struct ot_context *ot_init_async_m0(struct ot_test *test)
  363. {
  364. struct ot_context *sop = NULL;
  365. int max = num_possible_cpus() << 3;
  366. gfp_t gfp = GFP_KERNEL;
  367. sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
  368. if (!sop)
  369. return NULL;
  370. sop->test = test;
  371. if (test->objsz < 512)
  372. gfp = GFP_ATOMIC;
  373. if (objpool_init(&sop->pool, max, test->objsz, gfp, sop,
  374. ot_init_node, ot_objpool_release)) {
  375. ot_kfree(test, sop, sizeof(*sop));
  376. return NULL;
  377. }
  378. WARN_ON(max != sop->pool.nr_objs);
  379. return sop;
  380. }
  381. static struct {
  382. struct ot_context * (*init)(struct ot_test *oc);
  383. void (*fini)(struct ot_context *sop);
  384. } g_ot_async_ops[] = {
  385. {.init = ot_init_async_m0, .fini = ot_fini_async},
  386. };
  387. static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool,
  388. int release)
  389. {
  390. struct ot_context *sop;
  391. on->refs++;
  392. if (!release) {
  393. /* push object back to opjpool for reuse */
  394. objpool_push(on, pool);
  395. return;
  396. }
  397. sop = container_of(pool, struct ot_context, pool);
  398. WARN_ON(sop != pool->context);
  399. /* unref objpool with nod removed forever */
  400. objpool_drop(on, pool);
  401. }
  402. static void ot_bulk_async(struct ot_item *item, int irq)
  403. {
  404. struct ot_test *test = item->test;
  405. struct ot_node *nods[OT_NR_MAX_BULK];
  406. int i, stop;
  407. for (i = 0; i < item->bulk[irq]; i++)
  408. nods[i] = objpool_pop(item->pool);
  409. if (!irq) {
  410. if (item->delay || !(++(item->niters) & 0x7FFF))
  411. msleep(item->delay);
  412. get_cpu();
  413. }
  414. stop = atomic_read_acquire(&test->data.stop);
  415. /* drop all objects and deref objpool */
  416. while (i-- > 0) {
  417. struct ot_node *on = nods[i];
  418. if (on) {
  419. on->refs++;
  420. ot_nod_recycle(on, item->pool, stop);
  421. item->stat[irq].nhits++;
  422. } else {
  423. item->stat[irq].nmiss++;
  424. }
  425. }
  426. if (!irq)
  427. put_cpu();
  428. }
  429. static int ot_start_async(struct ot_test *test)
  430. {
  431. struct ot_context *sop;
  432. ktime_t start;
  433. u64 duration;
  434. unsigned long timeout;
  435. int cpu;
  436. /* initialize objpool for syncrhonous testcase */
  437. sop = g_ot_async_ops[test->mode].init(test);
  438. if (!sop)
  439. return -ENOMEM;
  440. /* grab rwsem to block testing threads */
  441. down_write(&test->data.start);
  442. for_each_possible_cpu(cpu) {
  443. struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
  444. struct task_struct *work;
  445. ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async);
  446. /* skip offline cpus */
  447. if (!cpu_online(cpu))
  448. continue;
  449. work = kthread_run_on_cpu(ot_thread_worker, item, cpu, "ot_worker_%d");
  450. if (IS_ERR(work))
  451. pr_err("failed to create thread for cpu %d\n", cpu);
  452. }
  453. /* wait a while to make sure all threads waiting at start line */
  454. msleep(20);
  455. /* in case no threads were created: memory insufficient ? */
  456. if (atomic_dec_and_test(&test->data.nthreads))
  457. complete(&test->data.wait);
  458. /* start objpool testing threads */
  459. start = ktime_get();
  460. up_write(&test->data.start);
  461. /* yeild cpu to worker threads for duration ms */
  462. timeout = msecs_to_jiffies(test->duration);
  463. schedule_timeout_interruptible(timeout);
  464. /* tell workers threads to quit */
  465. atomic_set_release(&test->data.stop, 1);
  466. /* do async-finalization */
  467. g_ot_async_ops[test->mode].fini(sop);
  468. /* wait all workers threads finish and quit */
  469. wait_for_completion(&test->data.wait);
  470. duration = (u64) ktime_us_delta(ktime_get(), start);
  471. /* assure rcu callback is triggered */
  472. wait_for_completion(&test->data.rcu);
  473. /*
  474. * now we are sure that objpool is finalized either
  475. * by rcu callback or by worker threads
  476. */
  477. /* report testing summary and performance results */
  478. ot_perf_report(test, duration);
  479. /* report memory allocation summary */
  480. ot_mem_report(test);
  481. return 0;
  482. }
  483. /*
  484. * predefined testing cases:
  485. * synchronous case / overrun case / async case
  486. *
  487. * async: synchronous or asynchronous testing
  488. * mode: only mode 0 supported
  489. * objsz: object size
  490. * duration: int, total test time in ms
  491. * delay: int, delay (in ms) between each iteration
  492. * bulk_normal: int, repeat times for thread worker
  493. * bulk_irq: int, repeat times for irq consumer
  494. * hrtimer: unsigned long, hrtimer intervnal in ms
  495. * name: char *, tag for current test ot_item
  496. */
  497. #define NODE_COMPACT sizeof(struct ot_node)
  498. #define NODE_VMALLOC (512)
  499. static struct ot_test g_testcases[] = {
  500. /* sync & normal */
  501. {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"},
  502. {0, 0, NODE_VMALLOC, 1000, 0, 1, 0, 0, "sync: percpu objpool from vmalloc"},
  503. /* sync & hrtimer */
  504. {0, 0, NODE_COMPACT, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool"},
  505. {0, 0, NODE_VMALLOC, 1000, 0, 1, 1, 4, "sync & hrtimer: percpu objpool from vmalloc"},
  506. /* sync & overrun */
  507. {0, 0, NODE_COMPACT, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool"},
  508. {0, 0, NODE_VMALLOC, 1000, 0, 16, 0, 0, "sync overrun: percpu objpool from vmalloc"},
  509. /* async mode */
  510. {1, 0, NODE_COMPACT, 1000, 100, 1, 0, 0, "async: percpu objpool"},
  511. {1, 0, NODE_VMALLOC, 1000, 100, 1, 0, 0, "async: percpu objpool from vmalloc"},
  512. /* async + hrtimer mode */
  513. {1, 0, NODE_COMPACT, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool"},
  514. {1, 0, NODE_VMALLOC, 1000, 0, 4, 4, 4, "async & hrtimer: percpu objpool from vmalloc"},
  515. };
  516. static int __init ot_mod_init(void)
  517. {
  518. int i;
  519. /* perform testings */
  520. for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
  521. ot_init_data(&g_testcases[i].data);
  522. if (g_testcases[i].async)
  523. ot_start_async(&g_testcases[i]);
  524. else
  525. ot_start_sync(&g_testcases[i]);
  526. }
  527. /* show tests summary */
  528. pr_info("\n");
  529. pr_info("Summary of testcases:\n");
  530. for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
  531. pr_info(" duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n",
  532. g_testcases[i].data.duration, g_testcases[i].data.objects.nhits,
  533. g_testcases[i].data.objects.nmiss, g_testcases[i].name);
  534. }
  535. return -EAGAIN;
  536. }
  537. static void __exit ot_mod_exit(void)
  538. {
  539. }
  540. module_init(ot_mod_init);
  541. module_exit(ot_mod_exit);
  542. MODULE_DESCRIPTION("Test module for lockless object pool");
  543. MODULE_LICENSE("GPL");