param_test.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660
  1. // SPDX-License-Identifier: LGPL-2.1
  2. #define _GNU_SOURCE
  3. #include <assert.h>
  4. #include <linux/membarrier.h>
  5. #include <pthread.h>
  6. #include <sched.h>
  7. #include <stdatomic.h>
  8. #include <stdint.h>
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <syscall.h>
  13. #include <unistd.h>
  14. #include <poll.h>
  15. #include <sys/types.h>
  16. #include <signal.h>
  17. #include <errno.h>
  18. #include <stddef.h>
  19. #include <stdbool.h>
  20. static inline pid_t rseq_gettid(void)
  21. {
  22. return syscall(__NR_gettid);
  23. }
  24. #define NR_INJECT 9
  25. static int loop_cnt[NR_INJECT + 1];
  26. static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
  27. static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
  28. static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
  29. static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
  30. static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
  31. static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
  32. static int opt_modulo, verbose;
  33. static int opt_yield, opt_signal, opt_sleep,
  34. opt_disable_rseq, opt_threads = 200,
  35. opt_disable_mod = 0, opt_test = 's';
  36. static long long opt_reps = 5000;
  37. static __thread __attribute__((tls_model("initial-exec")))
  38. unsigned int signals_delivered;
  39. #ifndef BENCHMARK
  40. static __thread __attribute__((tls_model("initial-exec"), unused))
  41. unsigned int yield_mod_cnt, nr_abort;
  42. #define printf_verbose(fmt, ...) \
  43. do { \
  44. if (verbose) \
  45. printf(fmt, ## __VA_ARGS__); \
  46. } while (0)
  47. #ifdef __i386__
  48. #define INJECT_ASM_REG "eax"
  49. #define RSEQ_INJECT_CLOBBER \
  50. , INJECT_ASM_REG
  51. #define RSEQ_INJECT_ASM(n) \
  52. "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
  53. "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
  54. "jz 333f\n\t" \
  55. "222:\n\t" \
  56. "dec %%" INJECT_ASM_REG "\n\t" \
  57. "jnz 222b\n\t" \
  58. "333:\n\t"
  59. #elif defined(__x86_64__)
  60. #define INJECT_ASM_REG_P "rax"
  61. #define INJECT_ASM_REG "eax"
  62. #define RSEQ_INJECT_CLOBBER \
  63. , INJECT_ASM_REG_P \
  64. , INJECT_ASM_REG
  65. #define RSEQ_INJECT_ASM(n) \
  66. "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
  67. "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
  68. "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
  69. "jz 333f\n\t" \
  70. "222:\n\t" \
  71. "dec %%" INJECT_ASM_REG "\n\t" \
  72. "jnz 222b\n\t" \
  73. "333:\n\t"
  74. #elif defined(__s390__)
  75. #define RSEQ_INJECT_INPUT \
  76. , [loop_cnt_1]"m"(loop_cnt[1]) \
  77. , [loop_cnt_2]"m"(loop_cnt[2]) \
  78. , [loop_cnt_3]"m"(loop_cnt[3]) \
  79. , [loop_cnt_4]"m"(loop_cnt[4]) \
  80. , [loop_cnt_5]"m"(loop_cnt[5]) \
  81. , [loop_cnt_6]"m"(loop_cnt[6])
  82. #define INJECT_ASM_REG "r12"
  83. #define RSEQ_INJECT_CLOBBER \
  84. , INJECT_ASM_REG
  85. #define RSEQ_INJECT_ASM(n) \
  86. "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
  87. "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
  88. "je 333f\n\t" \
  89. "222:\n\t" \
  90. "ahi %%" INJECT_ASM_REG ", -1\n\t" \
  91. "jnz 222b\n\t" \
  92. "333:\n\t"
  93. #elif defined(__ARMEL__)
  94. #define RSEQ_INJECT_INPUT \
  95. , [loop_cnt_1]"m"(loop_cnt[1]) \
  96. , [loop_cnt_2]"m"(loop_cnt[2]) \
  97. , [loop_cnt_3]"m"(loop_cnt[3]) \
  98. , [loop_cnt_4]"m"(loop_cnt[4]) \
  99. , [loop_cnt_5]"m"(loop_cnt[5]) \
  100. , [loop_cnt_6]"m"(loop_cnt[6])
  101. #define INJECT_ASM_REG "r4"
  102. #define RSEQ_INJECT_CLOBBER \
  103. , INJECT_ASM_REG
  104. #define RSEQ_INJECT_ASM(n) \
  105. "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
  106. "cmp " INJECT_ASM_REG ", #0\n\t" \
  107. "beq 333f\n\t" \
  108. "222:\n\t" \
  109. "subs " INJECT_ASM_REG ", #1\n\t" \
  110. "bne 222b\n\t" \
  111. "333:\n\t"
  112. #elif defined(__AARCH64EL__)
  113. #define RSEQ_INJECT_INPUT \
  114. , [loop_cnt_1] "Qo" (loop_cnt[1]) \
  115. , [loop_cnt_2] "Qo" (loop_cnt[2]) \
  116. , [loop_cnt_3] "Qo" (loop_cnt[3]) \
  117. , [loop_cnt_4] "Qo" (loop_cnt[4]) \
  118. , [loop_cnt_5] "Qo" (loop_cnt[5]) \
  119. , [loop_cnt_6] "Qo" (loop_cnt[6])
  120. #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
  121. #define RSEQ_INJECT_ASM(n) \
  122. " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
  123. " cbz " INJECT_ASM_REG ", 333f\n" \
  124. "222:\n" \
  125. " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
  126. " cbnz " INJECT_ASM_REG ", 222b\n" \
  127. "333:\n"
  128. #elif defined(__PPC__)
  129. #define RSEQ_INJECT_INPUT \
  130. , [loop_cnt_1]"m"(loop_cnt[1]) \
  131. , [loop_cnt_2]"m"(loop_cnt[2]) \
  132. , [loop_cnt_3]"m"(loop_cnt[3]) \
  133. , [loop_cnt_4]"m"(loop_cnt[4]) \
  134. , [loop_cnt_5]"m"(loop_cnt[5]) \
  135. , [loop_cnt_6]"m"(loop_cnt[6])
  136. #define INJECT_ASM_REG "r18"
  137. #define RSEQ_INJECT_CLOBBER \
  138. , INJECT_ASM_REG
  139. #define RSEQ_INJECT_ASM(n) \
  140. "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
  141. "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
  142. "beq 333f\n\t" \
  143. "222:\n\t" \
  144. "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
  145. "bne 222b\n\t" \
  146. "333:\n\t"
  147. #elif defined(__mips__)
  148. #define RSEQ_INJECT_INPUT \
  149. , [loop_cnt_1]"m"(loop_cnt[1]) \
  150. , [loop_cnt_2]"m"(loop_cnt[2]) \
  151. , [loop_cnt_3]"m"(loop_cnt[3]) \
  152. , [loop_cnt_4]"m"(loop_cnt[4]) \
  153. , [loop_cnt_5]"m"(loop_cnt[5]) \
  154. , [loop_cnt_6]"m"(loop_cnt[6])
  155. #define INJECT_ASM_REG "$5"
  156. #define RSEQ_INJECT_CLOBBER \
  157. , INJECT_ASM_REG
  158. #define RSEQ_INJECT_ASM(n) \
  159. "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
  160. "beqz " INJECT_ASM_REG ", 333f\n\t" \
  161. "222:\n\t" \
  162. "addiu " INJECT_ASM_REG ", -1\n\t" \
  163. "bnez " INJECT_ASM_REG ", 222b\n\t" \
  164. "333:\n\t"
  165. #elif defined(__riscv)
  166. #define RSEQ_INJECT_INPUT \
  167. , [loop_cnt_1]"m"(loop_cnt[1]) \
  168. , [loop_cnt_2]"m"(loop_cnt[2]) \
  169. , [loop_cnt_3]"m"(loop_cnt[3]) \
  170. , [loop_cnt_4]"m"(loop_cnt[4]) \
  171. , [loop_cnt_5]"m"(loop_cnt[5]) \
  172. , [loop_cnt_6]"m"(loop_cnt[6])
  173. #define INJECT_ASM_REG "t1"
  174. #define RSEQ_INJECT_CLOBBER \
  175. , INJECT_ASM_REG
  176. #define RSEQ_INJECT_ASM(n) \
  177. "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
  178. "beqz " INJECT_ASM_REG ", 333f\n\t" \
  179. "222:\n\t" \
  180. "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
  181. "bnez " INJECT_ASM_REG ", 222b\n\t" \
  182. "333:\n\t"
  183. #elif defined(__or1k__)
  184. #define RSEQ_INJECT_INPUT \
  185. , [loop_cnt_1]"m"(loop_cnt[1]) \
  186. , [loop_cnt_2]"m"(loop_cnt[2]) \
  187. , [loop_cnt_3]"m"(loop_cnt[3]) \
  188. , [loop_cnt_4]"m"(loop_cnt[4]) \
  189. , [loop_cnt_5]"m"(loop_cnt[5]) \
  190. , [loop_cnt_6]"m"(loop_cnt[6])
  191. #define INJECT_ASM_REG "r31"
  192. #define RSEQ_INJECT_CLOBBER \
  193. , INJECT_ASM_REG
  194. #define RSEQ_INJECT_ASM(n) \
  195. "l.lwz " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
  196. "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \
  197. "l.bf 333f\n\t" \
  198. " l.nop\n\t" \
  199. "222:\n\t" \
  200. "l.addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
  201. "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \
  202. "l.bf 222f\n\t" \
  203. " l.nop\n\t" \
  204. "333:\n\t"
  205. #else
  206. #error unsupported target
  207. #endif
  208. #define RSEQ_INJECT_FAILED \
  209. nr_abort++;
  210. #define RSEQ_INJECT_C(n) \
  211. { \
  212. int loc_i, loc_nr_loops = loop_cnt[n]; \
  213. \
  214. for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
  215. rseq_barrier(); \
  216. } \
  217. if (loc_nr_loops == -1 && opt_modulo) { \
  218. if (yield_mod_cnt == opt_modulo - 1) { \
  219. if (opt_sleep > 0) \
  220. poll(NULL, 0, opt_sleep); \
  221. if (opt_yield) \
  222. sched_yield(); \
  223. if (opt_signal) \
  224. raise(SIGUSR1); \
  225. yield_mod_cnt = 0; \
  226. } else { \
  227. yield_mod_cnt++; \
  228. } \
  229. } \
  230. }
  231. #else
  232. #define printf_verbose(fmt, ...)
  233. #endif /* BENCHMARK */
  234. #include "rseq.h"
  235. static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
  236. #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
  237. #define TEST_MEMBARRIER
  238. static int sys_membarrier(int cmd, int flags, int cpu_id)
  239. {
  240. return syscall(__NR_membarrier, cmd, flags, cpu_id);
  241. }
  242. #endif
  243. #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
  244. # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
  245. static
  246. int get_current_cpu_id(void)
  247. {
  248. return rseq_current_mm_cid();
  249. }
  250. static
  251. bool rseq_validate_cpu_id(void)
  252. {
  253. return rseq_mm_cid_available();
  254. }
  255. static
  256. bool rseq_use_cpu_index(void)
  257. {
  258. return false; /* Use mm_cid */
  259. }
  260. # ifdef TEST_MEMBARRIER
  261. /*
  262. * Membarrier does not currently support targeting a mm_cid, so
  263. * issue the barrier on all cpus.
  264. */
  265. static
  266. int rseq_membarrier_expedited(int cpu)
  267. {
  268. return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
  269. 0, 0);
  270. }
  271. # endif /* TEST_MEMBARRIER */
  272. #else
  273. # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
  274. static
  275. int get_current_cpu_id(void)
  276. {
  277. return rseq_cpu_start();
  278. }
  279. static
  280. bool rseq_validate_cpu_id(void)
  281. {
  282. return rseq_current_cpu_raw() >= 0;
  283. }
  284. static
  285. bool rseq_use_cpu_index(void)
  286. {
  287. return true; /* Use cpu_id as index. */
  288. }
  289. # ifdef TEST_MEMBARRIER
  290. static
  291. int rseq_membarrier_expedited(int cpu)
  292. {
  293. return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
  294. MEMBARRIER_CMD_FLAG_CPU, cpu);
  295. }
  296. # endif /* TEST_MEMBARRIER */
  297. #endif
  298. struct percpu_lock_entry {
  299. intptr_t v;
  300. } __attribute__((aligned(128)));
  301. struct percpu_lock {
  302. struct percpu_lock_entry c[CPU_SETSIZE];
  303. };
  304. struct test_data_entry {
  305. intptr_t count;
  306. } __attribute__((aligned(128)));
  307. struct spinlock_test_data {
  308. struct percpu_lock lock;
  309. struct test_data_entry c[CPU_SETSIZE];
  310. };
  311. struct spinlock_thread_test_data {
  312. struct spinlock_test_data *data;
  313. long long reps;
  314. int reg;
  315. };
  316. struct inc_test_data {
  317. struct test_data_entry c[CPU_SETSIZE];
  318. };
  319. struct inc_thread_test_data {
  320. struct inc_test_data *data;
  321. long long reps;
  322. int reg;
  323. };
  324. struct percpu_list_node {
  325. intptr_t data;
  326. struct percpu_list_node *next;
  327. };
  328. struct percpu_list_entry {
  329. struct percpu_list_node *head;
  330. } __attribute__((aligned(128)));
  331. struct percpu_list {
  332. struct percpu_list_entry c[CPU_SETSIZE];
  333. };
  334. #define BUFFER_ITEM_PER_CPU 100
  335. struct percpu_buffer_node {
  336. intptr_t data;
  337. };
  338. struct percpu_buffer_entry {
  339. intptr_t offset;
  340. intptr_t buflen;
  341. struct percpu_buffer_node **array;
  342. } __attribute__((aligned(128)));
  343. struct percpu_buffer {
  344. struct percpu_buffer_entry c[CPU_SETSIZE];
  345. };
  346. #define MEMCPY_BUFFER_ITEM_PER_CPU 100
  347. struct percpu_memcpy_buffer_node {
  348. intptr_t data1;
  349. uint64_t data2;
  350. };
  351. struct percpu_memcpy_buffer_entry {
  352. intptr_t offset;
  353. intptr_t buflen;
  354. struct percpu_memcpy_buffer_node *array;
  355. } __attribute__((aligned(128)));
  356. struct percpu_memcpy_buffer {
  357. struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
  358. };
  359. /* A simple percpu spinlock. Grabs lock on current cpu. */
  360. static int rseq_this_cpu_lock(struct percpu_lock *lock)
  361. {
  362. int cpu;
  363. for (;;) {
  364. int ret;
  365. cpu = get_current_cpu_id();
  366. if (cpu < 0) {
  367. fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
  368. getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
  369. abort();
  370. }
  371. ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
  372. &lock->c[cpu].v,
  373. 0, 1, cpu);
  374. if (rseq_likely(!ret))
  375. break;
  376. /* Retry if comparison fails or rseq aborts. */
  377. }
  378. /*
  379. * Acquire semantic when taking lock after control dependency.
  380. * Matches rseq_smp_store_release().
  381. */
  382. rseq_smp_acquire__after_ctrl_dep();
  383. return cpu;
  384. }
  385. static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
  386. {
  387. assert(lock->c[cpu].v == 1);
  388. /*
  389. * Release lock, with release semantic. Matches
  390. * rseq_smp_acquire__after_ctrl_dep().
  391. */
  392. rseq_smp_store_release(&lock->c[cpu].v, 0);
  393. }
  394. void *test_percpu_spinlock_thread(void *arg)
  395. {
  396. struct spinlock_thread_test_data *thread_data = arg;
  397. struct spinlock_test_data *data = thread_data->data;
  398. long long i, reps;
  399. if (!opt_disable_rseq && thread_data->reg &&
  400. rseq_register_current_thread())
  401. abort();
  402. reps = thread_data->reps;
  403. for (i = 0; i < reps; i++) {
  404. int cpu = rseq_this_cpu_lock(&data->lock);
  405. data->c[cpu].count++;
  406. rseq_percpu_unlock(&data->lock, cpu);
  407. #ifndef BENCHMARK
  408. if (i != 0 && !(i % (reps / 10)))
  409. printf_verbose("tid %d: count %lld\n",
  410. (int) rseq_gettid(), i);
  411. #endif
  412. }
  413. printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
  414. (int) rseq_gettid(), nr_abort, signals_delivered);
  415. if (!opt_disable_rseq && thread_data->reg &&
  416. rseq_unregister_current_thread())
  417. abort();
  418. return NULL;
  419. }
  420. /*
  421. * A simple test which implements a sharded counter using a per-cpu
  422. * lock. Obviously real applications might prefer to simply use a
  423. * per-cpu increment; however, this is reasonable for a test and the
  424. * lock can be extended to synchronize more complicated operations.
  425. */
  426. void test_percpu_spinlock(void)
  427. {
  428. const int num_threads = opt_threads;
  429. int i, ret;
  430. uint64_t sum;
  431. pthread_t test_threads[num_threads];
  432. struct spinlock_test_data data;
  433. struct spinlock_thread_test_data thread_data[num_threads];
  434. memset(&data, 0, sizeof(data));
  435. for (i = 0; i < num_threads; i++) {
  436. thread_data[i].reps = opt_reps;
  437. if (opt_disable_mod <= 0 || (i % opt_disable_mod))
  438. thread_data[i].reg = 1;
  439. else
  440. thread_data[i].reg = 0;
  441. thread_data[i].data = &data;
  442. ret = pthread_create(&test_threads[i], NULL,
  443. test_percpu_spinlock_thread,
  444. &thread_data[i]);
  445. if (ret) {
  446. errno = ret;
  447. perror("pthread_create");
  448. abort();
  449. }
  450. }
  451. for (i = 0; i < num_threads; i++) {
  452. ret = pthread_join(test_threads[i], NULL);
  453. if (ret) {
  454. errno = ret;
  455. perror("pthread_join");
  456. abort();
  457. }
  458. }
  459. sum = 0;
  460. for (i = 0; i < CPU_SETSIZE; i++)
  461. sum += data.c[i].count;
  462. assert(sum == (uint64_t)opt_reps * num_threads);
  463. }
  464. void *test_percpu_inc_thread(void *arg)
  465. {
  466. struct inc_thread_test_data *thread_data = arg;
  467. struct inc_test_data *data = thread_data->data;
  468. long long i, reps;
  469. if (!opt_disable_rseq && thread_data->reg &&
  470. rseq_register_current_thread())
  471. abort();
  472. reps = thread_data->reps;
  473. for (i = 0; i < reps; i++) {
  474. int ret;
  475. do {
  476. int cpu;
  477. cpu = get_current_cpu_id();
  478. ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
  479. &data->c[cpu].count, 1, cpu);
  480. } while (rseq_unlikely(ret));
  481. #ifndef BENCHMARK
  482. if (i != 0 && !(i % (reps / 10)))
  483. printf_verbose("tid %d: count %lld\n",
  484. (int) rseq_gettid(), i);
  485. #endif
  486. }
  487. printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
  488. (int) rseq_gettid(), nr_abort, signals_delivered);
  489. if (!opt_disable_rseq && thread_data->reg &&
  490. rseq_unregister_current_thread())
  491. abort();
  492. return NULL;
  493. }
  494. void test_percpu_inc(void)
  495. {
  496. const int num_threads = opt_threads;
  497. int i, ret;
  498. uint64_t sum;
  499. pthread_t test_threads[num_threads];
  500. struct inc_test_data data;
  501. struct inc_thread_test_data thread_data[num_threads];
  502. memset(&data, 0, sizeof(data));
  503. for (i = 0; i < num_threads; i++) {
  504. thread_data[i].reps = opt_reps;
  505. if (opt_disable_mod <= 0 || (i % opt_disable_mod))
  506. thread_data[i].reg = 1;
  507. else
  508. thread_data[i].reg = 0;
  509. thread_data[i].data = &data;
  510. ret = pthread_create(&test_threads[i], NULL,
  511. test_percpu_inc_thread,
  512. &thread_data[i]);
  513. if (ret) {
  514. errno = ret;
  515. perror("pthread_create");
  516. abort();
  517. }
  518. }
  519. for (i = 0; i < num_threads; i++) {
  520. ret = pthread_join(test_threads[i], NULL);
  521. if (ret) {
  522. errno = ret;
  523. perror("pthread_join");
  524. abort();
  525. }
  526. }
  527. sum = 0;
  528. for (i = 0; i < CPU_SETSIZE; i++)
  529. sum += data.c[i].count;
  530. assert(sum == (uint64_t)opt_reps * num_threads);
  531. }
  532. void this_cpu_list_push(struct percpu_list *list,
  533. struct percpu_list_node *node,
  534. int *_cpu)
  535. {
  536. int cpu;
  537. for (;;) {
  538. intptr_t *targetptr, newval, expect;
  539. int ret;
  540. cpu = get_current_cpu_id();
  541. /* Load list->c[cpu].head with single-copy atomicity. */
  542. expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
  543. newval = (intptr_t)node;
  544. targetptr = (intptr_t *)&list->c[cpu].head;
  545. node->next = (struct percpu_list_node *)expect;
  546. ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
  547. targetptr, expect, newval, cpu);
  548. if (rseq_likely(!ret))
  549. break;
  550. /* Retry if comparison fails or rseq aborts. */
  551. }
  552. if (_cpu)
  553. *_cpu = cpu;
  554. }
  555. /*
  556. * Unlike a traditional lock-less linked list; the availability of a
  557. * rseq primitive allows us to implement pop without concerns over
  558. * ABA-type races.
  559. */
  560. struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
  561. int *_cpu)
  562. {
  563. struct percpu_list_node *node = NULL;
  564. int cpu;
  565. for (;;) {
  566. struct percpu_list_node *head;
  567. intptr_t *targetptr, expectnot, *load;
  568. long offset;
  569. int ret;
  570. cpu = get_current_cpu_id();
  571. targetptr = (intptr_t *)&list->c[cpu].head;
  572. expectnot = (intptr_t)NULL;
  573. offset = offsetof(struct percpu_list_node, next);
  574. load = (intptr_t *)&head;
  575. ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
  576. targetptr, expectnot,
  577. offset, load, cpu);
  578. if (rseq_likely(!ret)) {
  579. node = head;
  580. break;
  581. }
  582. if (ret > 0)
  583. break;
  584. /* Retry if rseq aborts. */
  585. }
  586. if (_cpu)
  587. *_cpu = cpu;
  588. return node;
  589. }
  590. /*
  591. * __percpu_list_pop is not safe against concurrent accesses. Should
  592. * only be used on lists that are not concurrently modified.
  593. */
  594. struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
  595. {
  596. struct percpu_list_node *node;
  597. node = list->c[cpu].head;
  598. if (!node)
  599. return NULL;
  600. list->c[cpu].head = node->next;
  601. return node;
  602. }
  603. void *test_percpu_list_thread(void *arg)
  604. {
  605. long long i, reps;
  606. struct percpu_list *list = (struct percpu_list *)arg;
  607. if (!opt_disable_rseq && rseq_register_current_thread())
  608. abort();
  609. reps = opt_reps;
  610. for (i = 0; i < reps; i++) {
  611. struct percpu_list_node *node;
  612. node = this_cpu_list_pop(list, NULL);
  613. if (opt_yield)
  614. sched_yield(); /* encourage shuffling */
  615. if (node)
  616. this_cpu_list_push(list, node, NULL);
  617. }
  618. printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
  619. (int) rseq_gettid(), nr_abort, signals_delivered);
  620. if (!opt_disable_rseq && rseq_unregister_current_thread())
  621. abort();
  622. return NULL;
  623. }
  624. /* Simultaneous modification to a per-cpu linked list from many threads. */
  625. void test_percpu_list(void)
  626. {
  627. const int num_threads = opt_threads;
  628. int i, j, ret;
  629. uint64_t sum = 0, expected_sum = 0;
  630. struct percpu_list list;
  631. pthread_t test_threads[num_threads];
  632. cpu_set_t allowed_cpus;
  633. memset(&list, 0, sizeof(list));
  634. /* Generate list entries for every usable cpu. */
  635. sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
  636. for (i = 0; i < CPU_SETSIZE; i++) {
  637. if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
  638. continue;
  639. for (j = 1; j <= 100; j++) {
  640. struct percpu_list_node *node;
  641. expected_sum += j;
  642. node = malloc(sizeof(*node));
  643. assert(node);
  644. node->data = j;
  645. node->next = list.c[i].head;
  646. list.c[i].head = node;
  647. }
  648. }
  649. for (i = 0; i < num_threads; i++) {
  650. ret = pthread_create(&test_threads[i], NULL,
  651. test_percpu_list_thread, &list);
  652. if (ret) {
  653. errno = ret;
  654. perror("pthread_create");
  655. abort();
  656. }
  657. }
  658. for (i = 0; i < num_threads; i++) {
  659. ret = pthread_join(test_threads[i], NULL);
  660. if (ret) {
  661. errno = ret;
  662. perror("pthread_join");
  663. abort();
  664. }
  665. }
  666. for (i = 0; i < CPU_SETSIZE; i++) {
  667. struct percpu_list_node *node;
  668. if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
  669. continue;
  670. while ((node = __percpu_list_pop(&list, i))) {
  671. sum += node->data;
  672. free(node);
  673. }
  674. }
  675. /*
  676. * All entries should now be accounted for (unless some external
  677. * actor is interfering with our allowed affinity while this
  678. * test is running).
  679. */
  680. assert(sum == expected_sum);
  681. }
  682. bool this_cpu_buffer_push(struct percpu_buffer *buffer,
  683. struct percpu_buffer_node *node,
  684. int *_cpu)
  685. {
  686. bool result = false;
  687. int cpu;
  688. for (;;) {
  689. intptr_t *targetptr_spec, newval_spec;
  690. intptr_t *targetptr_final, newval_final;
  691. intptr_t offset;
  692. int ret;
  693. cpu = get_current_cpu_id();
  694. offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
  695. if (offset == buffer->c[cpu].buflen)
  696. break;
  697. newval_spec = (intptr_t)node;
  698. targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
  699. newval_final = offset + 1;
  700. targetptr_final = &buffer->c[cpu].offset;
  701. ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
  702. targetptr_final, offset, targetptr_spec,
  703. newval_spec, newval_final, cpu);
  704. if (rseq_likely(!ret)) {
  705. result = true;
  706. break;
  707. }
  708. /* Retry if comparison fails or rseq aborts. */
  709. }
  710. if (_cpu)
  711. *_cpu = cpu;
  712. return result;
  713. }
  714. struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
  715. int *_cpu)
  716. {
  717. struct percpu_buffer_node *head;
  718. int cpu;
  719. for (;;) {
  720. intptr_t *targetptr, newval;
  721. intptr_t offset;
  722. int ret;
  723. cpu = get_current_cpu_id();
  724. /* Load offset with single-copy atomicity. */
  725. offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
  726. if (offset == 0) {
  727. head = NULL;
  728. break;
  729. }
  730. head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
  731. newval = offset - 1;
  732. targetptr = (intptr_t *)&buffer->c[cpu].offset;
  733. ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
  734. targetptr, offset,
  735. (intptr_t *)&buffer->c[cpu].array[offset - 1],
  736. (intptr_t)head, newval, cpu);
  737. if (rseq_likely(!ret))
  738. break;
  739. /* Retry if comparison fails or rseq aborts. */
  740. }
  741. if (_cpu)
  742. *_cpu = cpu;
  743. return head;
  744. }
  745. /*
  746. * __percpu_buffer_pop is not safe against concurrent accesses. Should
  747. * only be used on buffers that are not concurrently modified.
  748. */
  749. struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
  750. int cpu)
  751. {
  752. struct percpu_buffer_node *head;
  753. intptr_t offset;
  754. offset = buffer->c[cpu].offset;
  755. if (offset == 0)
  756. return NULL;
  757. head = buffer->c[cpu].array[offset - 1];
  758. buffer->c[cpu].offset = offset - 1;
  759. return head;
  760. }
  761. void *test_percpu_buffer_thread(void *arg)
  762. {
  763. long long i, reps;
  764. struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
  765. if (!opt_disable_rseq && rseq_register_current_thread())
  766. abort();
  767. reps = opt_reps;
  768. for (i = 0; i < reps; i++) {
  769. struct percpu_buffer_node *node;
  770. node = this_cpu_buffer_pop(buffer, NULL);
  771. if (opt_yield)
  772. sched_yield(); /* encourage shuffling */
  773. if (node) {
  774. if (!this_cpu_buffer_push(buffer, node, NULL)) {
  775. /* Should increase buffer size. */
  776. abort();
  777. }
  778. }
  779. }
  780. printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
  781. (int) rseq_gettid(), nr_abort, signals_delivered);
  782. if (!opt_disable_rseq && rseq_unregister_current_thread())
  783. abort();
  784. return NULL;
  785. }
  786. /* Simultaneous modification to a per-cpu buffer from many threads. */
  787. void test_percpu_buffer(void)
  788. {
  789. const int num_threads = opt_threads;
  790. int i, j, ret;
  791. uint64_t sum = 0, expected_sum = 0;
  792. struct percpu_buffer buffer;
  793. pthread_t test_threads[num_threads];
  794. cpu_set_t allowed_cpus;
  795. memset(&buffer, 0, sizeof(buffer));
  796. /* Generate list entries for every usable cpu. */
  797. sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
  798. for (i = 0; i < CPU_SETSIZE; i++) {
  799. if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
  800. continue;
  801. /* Worse-case is every item in same CPU. */
  802. buffer.c[i].array =
  803. malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
  804. BUFFER_ITEM_PER_CPU);
  805. assert(buffer.c[i].array);
  806. buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
  807. for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
  808. struct percpu_buffer_node *node;
  809. expected_sum += j;
  810. /*
  811. * We could theoretically put the word-sized
  812. * "data" directly in the buffer. However, we
  813. * want to model objects that would not fit
  814. * within a single word, so allocate an object
  815. * for each node.
  816. */
  817. node = malloc(sizeof(*node));
  818. assert(node);
  819. node->data = j;
  820. buffer.c[i].array[j - 1] = node;
  821. buffer.c[i].offset++;
  822. }
  823. }
  824. for (i = 0; i < num_threads; i++) {
  825. ret = pthread_create(&test_threads[i], NULL,
  826. test_percpu_buffer_thread, &buffer);
  827. if (ret) {
  828. errno = ret;
  829. perror("pthread_create");
  830. abort();
  831. }
  832. }
  833. for (i = 0; i < num_threads; i++) {
  834. ret = pthread_join(test_threads[i], NULL);
  835. if (ret) {
  836. errno = ret;
  837. perror("pthread_join");
  838. abort();
  839. }
  840. }
  841. for (i = 0; i < CPU_SETSIZE; i++) {
  842. struct percpu_buffer_node *node;
  843. if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
  844. continue;
  845. while ((node = __percpu_buffer_pop(&buffer, i))) {
  846. sum += node->data;
  847. free(node);
  848. }
  849. free(buffer.c[i].array);
  850. }
  851. /*
  852. * All entries should now be accounted for (unless some external
  853. * actor is interfering with our allowed affinity while this
  854. * test is running).
  855. */
  856. assert(sum == expected_sum);
  857. }
  858. bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
  859. struct percpu_memcpy_buffer_node item,
  860. int *_cpu)
  861. {
  862. bool result = false;
  863. int cpu;
  864. for (;;) {
  865. intptr_t *targetptr_final, newval_final, offset;
  866. char *destptr, *srcptr;
  867. size_t copylen;
  868. int ret;
  869. cpu = get_current_cpu_id();
  870. /* Load offset with single-copy atomicity. */
  871. offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
  872. if (offset == buffer->c[cpu].buflen)
  873. break;
  874. destptr = (char *)&buffer->c[cpu].array[offset];
  875. srcptr = (char *)&item;
  876. /* copylen must be <= 4kB. */
  877. copylen = sizeof(item);
  878. newval_final = offset + 1;
  879. targetptr_final = &buffer->c[cpu].offset;
  880. ret = rseq_cmpeqv_trymemcpy_storev(
  881. opt_mo, RSEQ_PERCPU,
  882. targetptr_final, offset,
  883. destptr, srcptr, copylen,
  884. newval_final, cpu);
  885. if (rseq_likely(!ret)) {
  886. result = true;
  887. break;
  888. }
  889. /* Retry if comparison fails or rseq aborts. */
  890. }
  891. if (_cpu)
  892. *_cpu = cpu;
  893. return result;
  894. }
  895. bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
  896. struct percpu_memcpy_buffer_node *item,
  897. int *_cpu)
  898. {
  899. bool result = false;
  900. int cpu;
  901. for (;;) {
  902. intptr_t *targetptr_final, newval_final, offset;
  903. char *destptr, *srcptr;
  904. size_t copylen;
  905. int ret;
  906. cpu = get_current_cpu_id();
  907. /* Load offset with single-copy atomicity. */
  908. offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
  909. if (offset == 0)
  910. break;
  911. destptr = (char *)item;
  912. srcptr = (char *)&buffer->c[cpu].array[offset - 1];
  913. /* copylen must be <= 4kB. */
  914. copylen = sizeof(*item);
  915. newval_final = offset - 1;
  916. targetptr_final = &buffer->c[cpu].offset;
  917. ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
  918. targetptr_final, offset, destptr, srcptr, copylen,
  919. newval_final, cpu);
  920. if (rseq_likely(!ret)) {
  921. result = true;
  922. break;
  923. }
  924. /* Retry if comparison fails or rseq aborts. */
  925. }
  926. if (_cpu)
  927. *_cpu = cpu;
  928. return result;
  929. }
  930. /*
  931. * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
  932. * only be used on buffers that are not concurrently modified.
  933. */
  934. bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
  935. struct percpu_memcpy_buffer_node *item,
  936. int cpu)
  937. {
  938. intptr_t offset;
  939. offset = buffer->c[cpu].offset;
  940. if (offset == 0)
  941. return false;
  942. memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
  943. buffer->c[cpu].offset = offset - 1;
  944. return true;
  945. }
  946. void *test_percpu_memcpy_buffer_thread(void *arg)
  947. {
  948. long long i, reps;
  949. struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
  950. if (!opt_disable_rseq && rseq_register_current_thread())
  951. abort();
  952. reps = opt_reps;
  953. for (i = 0; i < reps; i++) {
  954. struct percpu_memcpy_buffer_node item;
  955. bool result;
  956. result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
  957. if (opt_yield)
  958. sched_yield(); /* encourage shuffling */
  959. if (result) {
  960. if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
  961. /* Should increase buffer size. */
  962. abort();
  963. }
  964. }
  965. }
  966. printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
  967. (int) rseq_gettid(), nr_abort, signals_delivered);
  968. if (!opt_disable_rseq && rseq_unregister_current_thread())
  969. abort();
  970. return NULL;
  971. }
  972. /* Simultaneous modification to a per-cpu buffer from many threads. */
  973. void test_percpu_memcpy_buffer(void)
  974. {
  975. const int num_threads = opt_threads;
  976. int i, j, ret;
  977. uint64_t sum = 0, expected_sum = 0;
  978. struct percpu_memcpy_buffer buffer;
  979. pthread_t test_threads[num_threads];
  980. cpu_set_t allowed_cpus;
  981. memset(&buffer, 0, sizeof(buffer));
  982. /* Generate list entries for every usable cpu. */
  983. sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
  984. for (i = 0; i < CPU_SETSIZE; i++) {
  985. if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
  986. continue;
  987. /* Worse-case is every item in same CPU. */
  988. buffer.c[i].array =
  989. malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
  990. MEMCPY_BUFFER_ITEM_PER_CPU);
  991. assert(buffer.c[i].array);
  992. buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
  993. for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
  994. expected_sum += 2 * j + 1;
  995. /*
  996. * We could theoretically put the word-sized
  997. * "data" directly in the buffer. However, we
  998. * want to model objects that would not fit
  999. * within a single word, so allocate an object
  1000. * for each node.
  1001. */
  1002. buffer.c[i].array[j - 1].data1 = j;
  1003. buffer.c[i].array[j - 1].data2 = j + 1;
  1004. buffer.c[i].offset++;
  1005. }
  1006. }
  1007. for (i = 0; i < num_threads; i++) {
  1008. ret = pthread_create(&test_threads[i], NULL,
  1009. test_percpu_memcpy_buffer_thread,
  1010. &buffer);
  1011. if (ret) {
  1012. errno = ret;
  1013. perror("pthread_create");
  1014. abort();
  1015. }
  1016. }
  1017. for (i = 0; i < num_threads; i++) {
  1018. ret = pthread_join(test_threads[i], NULL);
  1019. if (ret) {
  1020. errno = ret;
  1021. perror("pthread_join");
  1022. abort();
  1023. }
  1024. }
  1025. for (i = 0; i < CPU_SETSIZE; i++) {
  1026. struct percpu_memcpy_buffer_node item;
  1027. if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
  1028. continue;
  1029. while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
  1030. sum += item.data1;
  1031. sum += item.data2;
  1032. }
  1033. free(buffer.c[i].array);
  1034. }
  1035. /*
  1036. * All entries should now be accounted for (unless some external
  1037. * actor is interfering with our allowed affinity while this
  1038. * test is running).
  1039. */
  1040. assert(sum == expected_sum);
  1041. }
  1042. static void test_signal_interrupt_handler(int signo)
  1043. {
  1044. signals_delivered++;
  1045. }
  1046. static int set_signal_handler(void)
  1047. {
  1048. int ret = 0;
  1049. struct sigaction sa;
  1050. sigset_t sigset;
  1051. ret = sigemptyset(&sigset);
  1052. if (ret < 0) {
  1053. perror("sigemptyset");
  1054. return ret;
  1055. }
  1056. sa.sa_handler = test_signal_interrupt_handler;
  1057. sa.sa_mask = sigset;
  1058. sa.sa_flags = 0;
  1059. ret = sigaction(SIGUSR1, &sa, NULL);
  1060. if (ret < 0) {
  1061. perror("sigaction");
  1062. return ret;
  1063. }
  1064. printf_verbose("Signal handler set for SIGUSR1\n");
  1065. return ret;
  1066. }
  1067. /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
  1068. #ifdef TEST_MEMBARRIER
  1069. struct test_membarrier_thread_args {
  1070. int stop;
  1071. intptr_t percpu_list_ptr;
  1072. };
  1073. /* Worker threads modify data in their "active" percpu lists. */
  1074. void *test_membarrier_worker_thread(void *arg)
  1075. {
  1076. struct test_membarrier_thread_args *args =
  1077. (struct test_membarrier_thread_args *)arg;
  1078. const int iters = opt_reps;
  1079. int i;
  1080. if (rseq_register_current_thread()) {
  1081. fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
  1082. errno, strerror(errno));
  1083. abort();
  1084. }
  1085. /* Wait for initialization. */
  1086. while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {}
  1087. for (i = 0; i < iters; ++i) {
  1088. int ret;
  1089. do {
  1090. int cpu = get_current_cpu_id();
  1091. ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
  1092. &args->percpu_list_ptr,
  1093. sizeof(struct percpu_list_entry) * cpu, 1, cpu);
  1094. } while (rseq_unlikely(ret));
  1095. }
  1096. if (rseq_unregister_current_thread()) {
  1097. fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
  1098. errno, strerror(errno));
  1099. abort();
  1100. }
  1101. return NULL;
  1102. }
  1103. void test_membarrier_init_percpu_list(struct percpu_list *list)
  1104. {
  1105. int i;
  1106. memset(list, 0, sizeof(*list));
  1107. for (i = 0; i < CPU_SETSIZE; i++) {
  1108. struct percpu_list_node *node;
  1109. node = malloc(sizeof(*node));
  1110. assert(node);
  1111. node->data = 0;
  1112. node->next = NULL;
  1113. list->c[i].head = node;
  1114. }
  1115. }
  1116. void test_membarrier_free_percpu_list(struct percpu_list *list)
  1117. {
  1118. int i;
  1119. for (i = 0; i < CPU_SETSIZE; i++)
  1120. free(list->c[i].head);
  1121. }
  1122. /*
  1123. * The manager thread swaps per-cpu lists that worker threads see,
  1124. * and validates that there are no unexpected modifications.
  1125. */
  1126. void *test_membarrier_manager_thread(void *arg)
  1127. {
  1128. struct test_membarrier_thread_args *args =
  1129. (struct test_membarrier_thread_args *)arg;
  1130. struct percpu_list list_a, list_b;
  1131. intptr_t expect_a = 0, expect_b = 0;
  1132. int cpu_a = 0, cpu_b = 0;
  1133. if (rseq_register_current_thread()) {
  1134. fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
  1135. errno, strerror(errno));
  1136. abort();
  1137. }
  1138. /* Init lists. */
  1139. test_membarrier_init_percpu_list(&list_a);
  1140. test_membarrier_init_percpu_list(&list_b);
  1141. __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
  1142. while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) {
  1143. /* list_a is "active". */
  1144. cpu_a = rand() % CPU_SETSIZE;
  1145. /*
  1146. * As list_b is "inactive", we should never see changes
  1147. * to list_b.
  1148. */
  1149. if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
  1150. fprintf(stderr, "Membarrier test failed\n");
  1151. abort();
  1152. }
  1153. /* Make list_b "active". */
  1154. __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE);
  1155. if (rseq_membarrier_expedited(cpu_a) &&
  1156. errno != ENXIO /* missing CPU */) {
  1157. perror("sys_membarrier");
  1158. abort();
  1159. }
  1160. /*
  1161. * Cpu A should now only modify list_b, so the values
  1162. * in list_a should be stable.
  1163. */
  1164. expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
  1165. cpu_b = rand() % CPU_SETSIZE;
  1166. /*
  1167. * As list_a is "inactive", we should never see changes
  1168. * to list_a.
  1169. */
  1170. if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
  1171. fprintf(stderr, "Membarrier test failed\n");
  1172. abort();
  1173. }
  1174. /* Make list_a "active". */
  1175. __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
  1176. if (rseq_membarrier_expedited(cpu_b) &&
  1177. errno != ENXIO /* missing CPU*/) {
  1178. perror("sys_membarrier");
  1179. abort();
  1180. }
  1181. /* Remember a value from list_b. */
  1182. expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
  1183. }
  1184. test_membarrier_free_percpu_list(&list_a);
  1185. test_membarrier_free_percpu_list(&list_b);
  1186. if (rseq_unregister_current_thread()) {
  1187. fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
  1188. errno, strerror(errno));
  1189. abort();
  1190. }
  1191. return NULL;
  1192. }
  1193. void test_membarrier(void)
  1194. {
  1195. const int num_threads = opt_threads;
  1196. struct test_membarrier_thread_args thread_args;
  1197. pthread_t worker_threads[num_threads];
  1198. pthread_t manager_thread;
  1199. int i, ret;
  1200. if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
  1201. perror("sys_membarrier");
  1202. abort();
  1203. }
  1204. thread_args.stop = 0;
  1205. thread_args.percpu_list_ptr = 0;
  1206. ret = pthread_create(&manager_thread, NULL,
  1207. test_membarrier_manager_thread, &thread_args);
  1208. if (ret) {
  1209. errno = ret;
  1210. perror("pthread_create");
  1211. abort();
  1212. }
  1213. for (i = 0; i < num_threads; i++) {
  1214. ret = pthread_create(&worker_threads[i], NULL,
  1215. test_membarrier_worker_thread, &thread_args);
  1216. if (ret) {
  1217. errno = ret;
  1218. perror("pthread_create");
  1219. abort();
  1220. }
  1221. }
  1222. for (i = 0; i < num_threads; i++) {
  1223. ret = pthread_join(worker_threads[i], NULL);
  1224. if (ret) {
  1225. errno = ret;
  1226. perror("pthread_join");
  1227. abort();
  1228. }
  1229. }
  1230. __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
  1231. ret = pthread_join(manager_thread, NULL);
  1232. if (ret) {
  1233. errno = ret;
  1234. perror("pthread_join");
  1235. abort();
  1236. }
  1237. }
  1238. #else /* TEST_MEMBARRIER */
  1239. void test_membarrier(void)
  1240. {
  1241. fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
  1242. "Skipping membarrier test.\n");
  1243. }
  1244. #endif
  1245. static void show_usage(int argc, char **argv)
  1246. {
  1247. printf("Usage : %s <OPTIONS>\n",
  1248. argv[0]);
  1249. printf("OPTIONS:\n");
  1250. printf(" [-1 loops] Number of loops for delay injection 1\n");
  1251. printf(" [-2 loops] Number of loops for delay injection 2\n");
  1252. printf(" [-3 loops] Number of loops for delay injection 3\n");
  1253. printf(" [-4 loops] Number of loops for delay injection 4\n");
  1254. printf(" [-5 loops] Number of loops for delay injection 5\n");
  1255. printf(" [-6 loops] Number of loops for delay injection 6\n");
  1256. printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
  1257. printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
  1258. printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
  1259. printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
  1260. printf(" [-y] Yield\n");
  1261. printf(" [-k] Kill thread with signal\n");
  1262. printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
  1263. printf(" [-t N] Number of threads (default 200)\n");
  1264. printf(" [-r N] Number of repetitions per thread (default 5000)\n");
  1265. printf(" [-d] Disable rseq system call (no initialization)\n");
  1266. printf(" [-D M] Disable rseq for each M threads\n");
  1267. printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
  1268. printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
  1269. printf(" [-v] Verbose output.\n");
  1270. printf(" [-h] Show this help.\n");
  1271. printf("\n");
  1272. }
  1273. int main(int argc, char **argv)
  1274. {
  1275. int i;
  1276. for (i = 1; i < argc; i++) {
  1277. if (argv[i][0] != '-')
  1278. continue;
  1279. switch (argv[i][1]) {
  1280. case '1':
  1281. case '2':
  1282. case '3':
  1283. case '4':
  1284. case '5':
  1285. case '6':
  1286. case '7':
  1287. case '8':
  1288. case '9':
  1289. if (argc < i + 2) {
  1290. show_usage(argc, argv);
  1291. goto error;
  1292. }
  1293. loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
  1294. i++;
  1295. break;
  1296. case 'm':
  1297. if (argc < i + 2) {
  1298. show_usage(argc, argv);
  1299. goto error;
  1300. }
  1301. opt_modulo = atol(argv[i + 1]);
  1302. if (opt_modulo < 0) {
  1303. show_usage(argc, argv);
  1304. goto error;
  1305. }
  1306. i++;
  1307. break;
  1308. case 's':
  1309. if (argc < i + 2) {
  1310. show_usage(argc, argv);
  1311. goto error;
  1312. }
  1313. opt_sleep = atol(argv[i + 1]);
  1314. if (opt_sleep < 0) {
  1315. show_usage(argc, argv);
  1316. goto error;
  1317. }
  1318. i++;
  1319. break;
  1320. case 'y':
  1321. opt_yield = 1;
  1322. break;
  1323. case 'k':
  1324. opt_signal = 1;
  1325. break;
  1326. case 'd':
  1327. opt_disable_rseq = 1;
  1328. break;
  1329. case 'D':
  1330. if (argc < i + 2) {
  1331. show_usage(argc, argv);
  1332. goto error;
  1333. }
  1334. opt_disable_mod = atol(argv[i + 1]);
  1335. if (opt_disable_mod < 0) {
  1336. show_usage(argc, argv);
  1337. goto error;
  1338. }
  1339. i++;
  1340. break;
  1341. case 't':
  1342. if (argc < i + 2) {
  1343. show_usage(argc, argv);
  1344. goto error;
  1345. }
  1346. opt_threads = atol(argv[i + 1]);
  1347. if (opt_threads < 0) {
  1348. show_usage(argc, argv);
  1349. goto error;
  1350. }
  1351. i++;
  1352. break;
  1353. case 'r':
  1354. if (argc < i + 2) {
  1355. show_usage(argc, argv);
  1356. goto error;
  1357. }
  1358. opt_reps = atoll(argv[i + 1]);
  1359. if (opt_reps < 0) {
  1360. show_usage(argc, argv);
  1361. goto error;
  1362. }
  1363. i++;
  1364. break;
  1365. case 'h':
  1366. show_usage(argc, argv);
  1367. goto end;
  1368. case 'T':
  1369. if (argc < i + 2) {
  1370. show_usage(argc, argv);
  1371. goto error;
  1372. }
  1373. opt_test = *argv[i + 1];
  1374. switch (opt_test) {
  1375. case 's':
  1376. case 'l':
  1377. case 'i':
  1378. case 'b':
  1379. case 'm':
  1380. case 'r':
  1381. break;
  1382. default:
  1383. show_usage(argc, argv);
  1384. goto error;
  1385. }
  1386. i++;
  1387. break;
  1388. case 'v':
  1389. verbose = 1;
  1390. break;
  1391. case 'M':
  1392. opt_mo = RSEQ_MO_RELEASE;
  1393. break;
  1394. default:
  1395. show_usage(argc, argv);
  1396. goto error;
  1397. }
  1398. }
  1399. loop_cnt_1 = loop_cnt[1];
  1400. loop_cnt_2 = loop_cnt[2];
  1401. loop_cnt_3 = loop_cnt[3];
  1402. loop_cnt_4 = loop_cnt[4];
  1403. loop_cnt_5 = loop_cnt[5];
  1404. loop_cnt_6 = loop_cnt[6];
  1405. if (set_signal_handler())
  1406. goto error;
  1407. if (!opt_disable_rseq && rseq_register_current_thread())
  1408. goto error;
  1409. if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
  1410. fprintf(stderr, "Error: cpu id getter unavailable\n");
  1411. goto error;
  1412. }
  1413. switch (opt_test) {
  1414. case 's':
  1415. printf_verbose("spinlock\n");
  1416. test_percpu_spinlock();
  1417. break;
  1418. case 'l':
  1419. printf_verbose("linked list\n");
  1420. test_percpu_list();
  1421. break;
  1422. case 'b':
  1423. printf_verbose("buffer\n");
  1424. test_percpu_buffer();
  1425. break;
  1426. case 'm':
  1427. printf_verbose("memcpy buffer\n");
  1428. test_percpu_memcpy_buffer();
  1429. break;
  1430. case 'i':
  1431. printf_verbose("counter increment\n");
  1432. test_percpu_inc();
  1433. break;
  1434. case 'r':
  1435. printf_verbose("membarrier\n");
  1436. test_membarrier();
  1437. break;
  1438. }
  1439. if (!opt_disable_rseq && rseq_unregister_current_thread())
  1440. abort();
  1441. end:
  1442. return 0;
  1443. error:
  1444. return -1;
  1445. }