test_lru_dist.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2016 Facebook
  4. */
  5. #define _GNU_SOURCE
  6. #include <linux/types.h>
  7. #include <stdio.h>
  8. #include <unistd.h>
  9. #include <linux/bpf.h>
  10. #include <errno.h>
  11. #include <string.h>
  12. #include <assert.h>
  13. #include <sched.h>
  14. #include <sys/wait.h>
  15. #include <sys/stat.h>
  16. #include <fcntl.h>
  17. #include <stdlib.h>
  18. #include <time.h>
  19. #include <bpf/bpf.h>
  20. #include "bpf_util.h"
  21. #define min(a, b) ((a) < (b) ? (a) : (b))
  22. #ifndef offsetof
  23. # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
  24. #endif
  25. #define container_of(ptr, type, member) ({ \
  26. const typeof( ((type *)0)->member ) *__mptr = (ptr); \
  27. (type *)( (char *)__mptr - offsetof(type,member) );})
  28. static int nr_cpus;
  29. static unsigned long long *dist_keys;
  30. static unsigned int dist_key_counts;
  31. struct list_head {
  32. struct list_head *next, *prev;
  33. };
  34. static inline void INIT_LIST_HEAD(struct list_head *list)
  35. {
  36. list->next = list;
  37. list->prev = list;
  38. }
  39. static inline void __list_add(struct list_head *new,
  40. struct list_head *prev,
  41. struct list_head *next)
  42. {
  43. next->prev = new;
  44. new->next = next;
  45. new->prev = prev;
  46. prev->next = new;
  47. }
  48. static inline void list_add(struct list_head *new, struct list_head *head)
  49. {
  50. __list_add(new, head, head->next);
  51. }
  52. static inline void __list_del(struct list_head *prev, struct list_head *next)
  53. {
  54. next->prev = prev;
  55. prev->next = next;
  56. }
  57. static inline void __list_del_entry(struct list_head *entry)
  58. {
  59. __list_del(entry->prev, entry->next);
  60. }
  61. static inline void list_move(struct list_head *list, struct list_head *head)
  62. {
  63. __list_del_entry(list);
  64. list_add(list, head);
  65. }
  66. #define list_entry(ptr, type, member) \
  67. container_of(ptr, type, member)
  68. #define list_last_entry(ptr, type, member) \
  69. list_entry((ptr)->prev, type, member)
  70. struct pfect_lru_node {
  71. struct list_head list;
  72. unsigned long long key;
  73. };
  74. struct pfect_lru {
  75. struct list_head list;
  76. struct pfect_lru_node *free_nodes;
  77. unsigned int cur_size;
  78. unsigned int lru_size;
  79. unsigned int nr_unique;
  80. unsigned int nr_misses;
  81. unsigned int total;
  82. int map_fd;
  83. };
  84. static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
  85. unsigned int nr_possible_elems)
  86. {
  87. lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
  88. sizeof(unsigned long long),
  89. sizeof(struct pfect_lru_node *),
  90. nr_possible_elems, NULL);
  91. assert(lru->map_fd != -1);
  92. lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
  93. assert(lru->free_nodes);
  94. INIT_LIST_HEAD(&lru->list);
  95. lru->cur_size = 0;
  96. lru->lru_size = lru_size;
  97. lru->nr_unique = lru->nr_misses = lru->total = 0;
  98. }
  99. static void pfect_lru_destroy(struct pfect_lru *lru)
  100. {
  101. close(lru->map_fd);
  102. free(lru->free_nodes);
  103. }
  104. static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
  105. unsigned long long key)
  106. {
  107. struct pfect_lru_node *node = NULL;
  108. int seen = 0;
  109. lru->total++;
  110. if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
  111. if (node) {
  112. list_move(&node->list, &lru->list);
  113. return 1;
  114. }
  115. seen = 1;
  116. }
  117. if (lru->cur_size < lru->lru_size) {
  118. node = &lru->free_nodes[lru->cur_size++];
  119. INIT_LIST_HEAD(&node->list);
  120. } else {
  121. struct pfect_lru_node *null_node = NULL;
  122. node = list_last_entry(&lru->list,
  123. struct pfect_lru_node,
  124. list);
  125. bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
  126. }
  127. node->key = key;
  128. list_move(&node->list, &lru->list);
  129. lru->nr_misses++;
  130. if (seen) {
  131. assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
  132. } else {
  133. lru->nr_unique++;
  134. assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
  135. }
  136. return seen;
  137. }
  138. static unsigned int read_keys(const char *dist_file,
  139. unsigned long long **keys)
  140. {
  141. struct stat fst;
  142. unsigned long long *retkeys;
  143. unsigned int counts = 0;
  144. int dist_fd;
  145. char *b, *l;
  146. int i;
  147. dist_fd = open(dist_file, 0);
  148. assert(dist_fd != -1);
  149. assert(fstat(dist_fd, &fst) == 0);
  150. b = malloc(fst.st_size);
  151. assert(b);
  152. assert(read(dist_fd, b, fst.st_size) == fst.st_size);
  153. close(dist_fd);
  154. for (i = 0; i < fst.st_size; i++) {
  155. if (b[i] == '\n')
  156. counts++;
  157. }
  158. counts++; /* in case the last line has no \n */
  159. retkeys = malloc(counts * sizeof(unsigned long long));
  160. assert(retkeys);
  161. counts = 0;
  162. for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
  163. retkeys[counts++] = strtoull(l, NULL, 10);
  164. free(b);
  165. *keys = retkeys;
  166. return counts;
  167. }
  168. static int create_map(int map_type, int map_flags, unsigned int size)
  169. {
  170. LIBBPF_OPTS(bpf_map_create_opts, opts,
  171. .map_flags = map_flags,
  172. );
  173. int map_fd;
  174. map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
  175. sizeof(unsigned long long), size, &opts);
  176. if (map_fd == -1)
  177. perror("bpf_create_map");
  178. return map_fd;
  179. }
  180. static int sched_next_online(int pid, int next_to_try)
  181. {
  182. cpu_set_t cpuset;
  183. if (next_to_try == nr_cpus)
  184. return -1;
  185. while (next_to_try < nr_cpus) {
  186. CPU_ZERO(&cpuset);
  187. CPU_SET(next_to_try++, &cpuset);
  188. if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
  189. break;
  190. }
  191. return next_to_try;
  192. }
  193. static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
  194. void *data)
  195. {
  196. int next_sched_cpu = 0;
  197. pid_t pid[tasks];
  198. int i;
  199. for (i = 0; i < tasks; i++) {
  200. pid[i] = fork();
  201. if (pid[i] == 0) {
  202. next_sched_cpu = sched_next_online(0, next_sched_cpu);
  203. fn(i, data);
  204. exit(0);
  205. } else if (pid[i] == -1) {
  206. printf("couldn't spawn #%d process\n", i);
  207. exit(1);
  208. }
  209. /* It is mostly redundant and just allow the parent
  210. * process to update next_shced_cpu for the next child
  211. * process
  212. */
  213. next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
  214. }
  215. for (i = 0; i < tasks; i++) {
  216. int status;
  217. assert(waitpid(pid[i], &status, 0) == pid[i]);
  218. assert(status == 0);
  219. }
  220. }
  221. static void do_test_lru_dist(int task, void *data)
  222. {
  223. unsigned int nr_misses = 0;
  224. struct pfect_lru pfect_lru;
  225. unsigned long long key, value = 1234;
  226. unsigned int i;
  227. unsigned int lru_map_fd = ((unsigned int *)data)[0];
  228. unsigned int lru_size = ((unsigned int *)data)[1];
  229. unsigned long long key_offset = task * dist_key_counts;
  230. pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
  231. for (i = 0; i < dist_key_counts; i++) {
  232. key = dist_keys[i] + key_offset;
  233. pfect_lru_lookup_or_insert(&pfect_lru, key);
  234. if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
  235. continue;
  236. if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
  237. printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
  238. key, errno);
  239. assert(0);
  240. }
  241. nr_misses++;
  242. }
  243. printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
  244. task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
  245. dist_key_counts);
  246. printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
  247. task, pfect_lru.nr_unique, pfect_lru.total,
  248. pfect_lru.nr_misses, pfect_lru.total);
  249. pfect_lru_destroy(&pfect_lru);
  250. close(lru_map_fd);
  251. }
  252. static void test_parallel_lru_dist(int map_type, int map_flags,
  253. int nr_tasks, unsigned int lru_size)
  254. {
  255. int child_data[2];
  256. int lru_map_fd;
  257. printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
  258. map_flags);
  259. if (map_flags & BPF_F_NO_COMMON_LRU)
  260. lru_map_fd = create_map(map_type, map_flags,
  261. nr_cpus * lru_size);
  262. else
  263. lru_map_fd = create_map(map_type, map_flags,
  264. nr_tasks * lru_size);
  265. assert(lru_map_fd != -1);
  266. child_data[0] = lru_map_fd;
  267. child_data[1] = lru_size;
  268. run_parallel(nr_tasks, do_test_lru_dist, child_data);
  269. close(lru_map_fd);
  270. }
  271. static void test_lru_loss0(int map_type, int map_flags)
  272. {
  273. unsigned long long key, value[nr_cpus];
  274. unsigned int old_unused_losses = 0;
  275. unsigned int new_unused_losses = 0;
  276. unsigned int used_losses = 0;
  277. int map_fd;
  278. printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
  279. map_flags);
  280. assert(sched_next_online(0, 0) != -1);
  281. if (map_flags & BPF_F_NO_COMMON_LRU)
  282. map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
  283. else
  284. map_fd = create_map(map_type, map_flags, 900);
  285. assert(map_fd != -1);
  286. value[0] = 1234;
  287. for (key = 1; key <= 1000; key++) {
  288. int start_key, end_key;
  289. assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
  290. start_key = 101;
  291. end_key = min(key, 900);
  292. while (start_key <= end_key) {
  293. bpf_map_lookup_elem(map_fd, &start_key, value);
  294. start_key++;
  295. }
  296. }
  297. for (key = 1; key <= 1000; key++) {
  298. if (bpf_map_lookup_elem(map_fd, &key, value)) {
  299. if (key <= 100)
  300. old_unused_losses++;
  301. else if (key <= 900)
  302. used_losses++;
  303. else
  304. new_unused_losses++;
  305. }
  306. }
  307. close(map_fd);
  308. printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
  309. "newer-elem-losses:%d(/100)\n",
  310. old_unused_losses, used_losses, new_unused_losses);
  311. }
  312. static void test_lru_loss1(int map_type, int map_flags)
  313. {
  314. unsigned long long key, value[nr_cpus];
  315. int map_fd;
  316. unsigned int nr_losses = 0;
  317. printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
  318. map_flags);
  319. assert(sched_next_online(0, 0) != -1);
  320. if (map_flags & BPF_F_NO_COMMON_LRU)
  321. map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
  322. else
  323. map_fd = create_map(map_type, map_flags, 1000);
  324. assert(map_fd != -1);
  325. value[0] = 1234;
  326. for (key = 1; key <= 1000; key++)
  327. assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
  328. for (key = 1; key <= 1000; key++) {
  329. if (bpf_map_lookup_elem(map_fd, &key, value))
  330. nr_losses++;
  331. }
  332. close(map_fd);
  333. printf("nr_losses:%d(/1000)\n", nr_losses);
  334. }
  335. static void do_test_parallel_lru_loss(int task, void *data)
  336. {
  337. const unsigned int nr_stable_elems = 1000;
  338. const unsigned int nr_repeats = 100000;
  339. int map_fd = *(int *)data;
  340. unsigned long long stable_base;
  341. unsigned long long key, value[nr_cpus];
  342. unsigned long long next_ins_key;
  343. unsigned int nr_losses = 0;
  344. unsigned int i;
  345. stable_base = task * nr_repeats * 2 + 1;
  346. next_ins_key = stable_base;
  347. value[0] = 1234;
  348. for (i = 0; i < nr_stable_elems; i++) {
  349. assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
  350. BPF_NOEXIST) == 0);
  351. next_ins_key++;
  352. }
  353. for (i = 0; i < nr_repeats; i++) {
  354. int rn;
  355. rn = rand();
  356. if (rn % 10) {
  357. key = rn % nr_stable_elems + stable_base;
  358. bpf_map_lookup_elem(map_fd, &key, value);
  359. } else {
  360. bpf_map_update_elem(map_fd, &next_ins_key, value,
  361. BPF_NOEXIST);
  362. next_ins_key++;
  363. }
  364. }
  365. key = stable_base;
  366. for (i = 0; i < nr_stable_elems; i++) {
  367. if (bpf_map_lookup_elem(map_fd, &key, value))
  368. nr_losses++;
  369. key++;
  370. }
  371. printf(" task:%d nr_losses:%u\n", task, nr_losses);
  372. }
  373. static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
  374. {
  375. int map_fd;
  376. printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
  377. map_flags);
  378. /* Give 20% more than the active working set */
  379. if (map_flags & BPF_F_NO_COMMON_LRU)
  380. map_fd = create_map(map_type, map_flags,
  381. nr_cpus * (1000 + 200));
  382. else
  383. map_fd = create_map(map_type, map_flags,
  384. nr_tasks * (1000 + 200));
  385. assert(map_fd != -1);
  386. run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
  387. close(map_fd);
  388. }
  389. int main(int argc, char **argv)
  390. {
  391. int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
  392. const char *dist_file;
  393. int nr_tasks = 1;
  394. int lru_size;
  395. int f;
  396. if (argc < 4) {
  397. printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
  398. argv[0]);
  399. return -1;
  400. }
  401. dist_file = argv[1];
  402. lru_size = atoi(argv[2]);
  403. nr_tasks = atoi(argv[3]);
  404. setbuf(stdout, NULL);
  405. srand(time(NULL));
  406. nr_cpus = bpf_num_possible_cpus();
  407. assert(nr_cpus != -1);
  408. printf("nr_cpus:%d\n\n", nr_cpus);
  409. nr_tasks = min(nr_tasks, nr_cpus);
  410. dist_key_counts = read_keys(dist_file, &dist_keys);
  411. if (!dist_key_counts) {
  412. printf("%s has no key\n", dist_file);
  413. return -1;
  414. }
  415. for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
  416. test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
  417. test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
  418. test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
  419. nr_tasks);
  420. test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
  421. nr_tasks, lru_size);
  422. printf("\n");
  423. }
  424. free(dist_keys);
  425. return 0;
  426. }