list_lru.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
  4. * Authors: David Chinner and Glauber Costa
  5. *
  6. * Generic LRU infrastructure
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/module.h>
  10. #include <linux/mm.h>
  11. #include <linux/list_lru.h>
  12. #include <linux/slab.h>
  13. #include <linux/mutex.h>
  14. #include <linux/memcontrol.h>
  15. #include "slab.h"
  16. #include "internal.h"
  17. #ifdef CONFIG_MEMCG
  18. static LIST_HEAD(memcg_list_lrus);
  19. static DEFINE_MUTEX(list_lrus_mutex);
  20. static inline bool list_lru_memcg_aware(struct list_lru *lru)
  21. {
  22. return lru->memcg_aware;
  23. }
  24. static void list_lru_register(struct list_lru *lru)
  25. {
  26. if (!list_lru_memcg_aware(lru))
  27. return;
  28. mutex_lock(&list_lrus_mutex);
  29. list_add(&lru->list, &memcg_list_lrus);
  30. mutex_unlock(&list_lrus_mutex);
  31. }
  32. static void list_lru_unregister(struct list_lru *lru)
  33. {
  34. if (!list_lru_memcg_aware(lru))
  35. return;
  36. mutex_lock(&list_lrus_mutex);
  37. list_del(&lru->list);
  38. mutex_unlock(&list_lrus_mutex);
  39. }
  40. static int lru_shrinker_id(struct list_lru *lru)
  41. {
  42. return lru->shrinker_id;
  43. }
  44. static inline struct list_lru_one *
  45. list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
  46. {
  47. if (list_lru_memcg_aware(lru) && idx >= 0) {
  48. struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
  49. return mlru ? &mlru->node[nid] : NULL;
  50. }
  51. return &lru->node[nid].lru;
  52. }
  53. static inline bool lock_list_lru(struct list_lru_one *l, bool irq)
  54. {
  55. if (irq)
  56. spin_lock_irq(&l->lock);
  57. else
  58. spin_lock(&l->lock);
  59. if (unlikely(READ_ONCE(l->nr_items) == LONG_MIN)) {
  60. if (irq)
  61. spin_unlock_irq(&l->lock);
  62. else
  63. spin_unlock(&l->lock);
  64. return false;
  65. }
  66. return true;
  67. }
  68. static inline struct list_lru_one *
  69. lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  70. bool irq, bool skip_empty)
  71. {
  72. struct list_lru_one *l;
  73. rcu_read_lock();
  74. again:
  75. l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
  76. if (likely(l) && lock_list_lru(l, irq)) {
  77. rcu_read_unlock();
  78. return l;
  79. }
  80. /*
  81. * Caller may simply bail out if raced with reparenting or
  82. * may iterate through the list_lru and expect empty slots.
  83. */
  84. if (skip_empty) {
  85. rcu_read_unlock();
  86. return NULL;
  87. }
  88. VM_WARN_ON(!css_is_dying(&memcg->css));
  89. memcg = parent_mem_cgroup(memcg);
  90. goto again;
  91. }
  92. static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
  93. {
  94. if (irq_off)
  95. spin_unlock_irq(&l->lock);
  96. else
  97. spin_unlock(&l->lock);
  98. }
  99. #else
  100. static void list_lru_register(struct list_lru *lru)
  101. {
  102. }
  103. static void list_lru_unregister(struct list_lru *lru)
  104. {
  105. }
  106. static int lru_shrinker_id(struct list_lru *lru)
  107. {
  108. return -1;
  109. }
  110. static inline bool list_lru_memcg_aware(struct list_lru *lru)
  111. {
  112. return false;
  113. }
  114. static inline struct list_lru_one *
  115. list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
  116. {
  117. return &lru->node[nid].lru;
  118. }
  119. static inline struct list_lru_one *
  120. lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  121. bool irq, bool skip_empty)
  122. {
  123. struct list_lru_one *l = &lru->node[nid].lru;
  124. if (irq)
  125. spin_lock_irq(&l->lock);
  126. else
  127. spin_lock(&l->lock);
  128. return l;
  129. }
  130. static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
  131. {
  132. if (irq_off)
  133. spin_unlock_irq(&l->lock);
  134. else
  135. spin_unlock(&l->lock);
  136. }
  137. #endif /* CONFIG_MEMCG */
  138. /* The caller must ensure the memcg lifetime. */
  139. bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
  140. struct mem_cgroup *memcg)
  141. {
  142. struct list_lru_node *nlru = &lru->node[nid];
  143. struct list_lru_one *l;
  144. l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
  145. if (!l)
  146. return false;
  147. if (list_empty(item)) {
  148. list_add_tail(item, &l->list);
  149. /* Set shrinker bit if the first element was added */
  150. if (!l->nr_items++)
  151. set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
  152. unlock_list_lru(l, false);
  153. atomic_long_inc(&nlru->nr_items);
  154. return true;
  155. }
  156. unlock_list_lru(l, false);
  157. return false;
  158. }
  159. bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
  160. {
  161. bool ret;
  162. int nid = page_to_nid(virt_to_page(item));
  163. if (list_lru_memcg_aware(lru)) {
  164. rcu_read_lock();
  165. ret = list_lru_add(lru, item, nid, mem_cgroup_from_virt(item));
  166. rcu_read_unlock();
  167. } else {
  168. ret = list_lru_add(lru, item, nid, NULL);
  169. }
  170. return ret;
  171. }
  172. EXPORT_SYMBOL_GPL(list_lru_add_obj);
  173. /* The caller must ensure the memcg lifetime. */
  174. bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
  175. struct mem_cgroup *memcg)
  176. {
  177. struct list_lru_node *nlru = &lru->node[nid];
  178. struct list_lru_one *l;
  179. l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
  180. if (!l)
  181. return false;
  182. if (!list_empty(item)) {
  183. list_del_init(item);
  184. l->nr_items--;
  185. unlock_list_lru(l, false);
  186. atomic_long_dec(&nlru->nr_items);
  187. return true;
  188. }
  189. unlock_list_lru(l, false);
  190. return false;
  191. }
  192. bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
  193. {
  194. bool ret;
  195. int nid = page_to_nid(virt_to_page(item));
  196. if (list_lru_memcg_aware(lru)) {
  197. rcu_read_lock();
  198. ret = list_lru_del(lru, item, nid, mem_cgroup_from_virt(item));
  199. rcu_read_unlock();
  200. } else {
  201. ret = list_lru_del(lru, item, nid, NULL);
  202. }
  203. return ret;
  204. }
  205. EXPORT_SYMBOL_GPL(list_lru_del_obj);
  206. void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
  207. {
  208. list_del_init(item);
  209. list->nr_items--;
  210. }
  211. EXPORT_SYMBOL_GPL(list_lru_isolate);
  212. void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
  213. struct list_head *head)
  214. {
  215. list_move(item, head);
  216. list->nr_items--;
  217. }
  218. EXPORT_SYMBOL_GPL(list_lru_isolate_move);
  219. unsigned long list_lru_count_one(struct list_lru *lru,
  220. int nid, struct mem_cgroup *memcg)
  221. {
  222. struct list_lru_one *l;
  223. long count;
  224. rcu_read_lock();
  225. l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
  226. count = l ? READ_ONCE(l->nr_items) : 0;
  227. rcu_read_unlock();
  228. if (unlikely(count < 0))
  229. count = 0;
  230. return count;
  231. }
  232. EXPORT_SYMBOL_GPL(list_lru_count_one);
  233. unsigned long list_lru_count_node(struct list_lru *lru, int nid)
  234. {
  235. struct list_lru_node *nlru;
  236. nlru = &lru->node[nid];
  237. return atomic_long_read(&nlru->nr_items);
  238. }
  239. EXPORT_SYMBOL_GPL(list_lru_count_node);
  240. static unsigned long
  241. __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  242. list_lru_walk_cb isolate, void *cb_arg,
  243. unsigned long *nr_to_walk, bool irq_off)
  244. {
  245. struct list_lru_node *nlru = &lru->node[nid];
  246. struct list_lru_one *l = NULL;
  247. struct list_head *item, *n;
  248. unsigned long isolated = 0;
  249. restart:
  250. l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
  251. if (!l)
  252. return isolated;
  253. list_for_each_safe(item, n, &l->list) {
  254. enum lru_status ret;
  255. /*
  256. * decrement nr_to_walk first so that we don't livelock if we
  257. * get stuck on large numbers of LRU_RETRY items
  258. */
  259. if (!*nr_to_walk)
  260. break;
  261. --*nr_to_walk;
  262. ret = isolate(item, l, cb_arg);
  263. switch (ret) {
  264. /*
  265. * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
  266. * lock. List traversal will have to restart from scratch.
  267. */
  268. case LRU_RETRY:
  269. goto restart;
  270. case LRU_REMOVED_RETRY:
  271. fallthrough;
  272. case LRU_REMOVED:
  273. isolated++;
  274. atomic_long_dec(&nlru->nr_items);
  275. if (ret == LRU_REMOVED_RETRY)
  276. goto restart;
  277. break;
  278. case LRU_ROTATE:
  279. list_move_tail(item, &l->list);
  280. break;
  281. case LRU_SKIP:
  282. break;
  283. case LRU_STOP:
  284. goto out;
  285. default:
  286. BUG();
  287. }
  288. }
  289. unlock_list_lru(l, irq_off);
  290. out:
  291. return isolated;
  292. }
  293. unsigned long
  294. list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  295. list_lru_walk_cb isolate, void *cb_arg,
  296. unsigned long *nr_to_walk)
  297. {
  298. return __list_lru_walk_one(lru, nid, memcg, isolate,
  299. cb_arg, nr_to_walk, false);
  300. }
  301. EXPORT_SYMBOL_GPL(list_lru_walk_one);
  302. unsigned long
  303. list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
  304. list_lru_walk_cb isolate, void *cb_arg,
  305. unsigned long *nr_to_walk)
  306. {
  307. return __list_lru_walk_one(lru, nid, memcg, isolate,
  308. cb_arg, nr_to_walk, true);
  309. }
  310. unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
  311. list_lru_walk_cb isolate, void *cb_arg,
  312. unsigned long *nr_to_walk)
  313. {
  314. long isolated = 0;
  315. isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
  316. nr_to_walk);
  317. #ifdef CONFIG_MEMCG
  318. if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
  319. struct list_lru_memcg *mlru;
  320. struct mem_cgroup *memcg;
  321. unsigned long index;
  322. xa_for_each(&lru->xa, index, mlru) {
  323. rcu_read_lock();
  324. memcg = mem_cgroup_from_private_id(index);
  325. if (!mem_cgroup_tryget(memcg)) {
  326. rcu_read_unlock();
  327. continue;
  328. }
  329. rcu_read_unlock();
  330. isolated += __list_lru_walk_one(lru, nid, memcg,
  331. isolate, cb_arg,
  332. nr_to_walk, false);
  333. mem_cgroup_put(memcg);
  334. if (*nr_to_walk <= 0)
  335. break;
  336. }
  337. }
  338. #endif
  339. return isolated;
  340. }
  341. EXPORT_SYMBOL_GPL(list_lru_walk_node);
  342. static void init_one_lru(struct list_lru *lru, struct list_lru_one *l)
  343. {
  344. INIT_LIST_HEAD(&l->list);
  345. spin_lock_init(&l->lock);
  346. l->nr_items = 0;
  347. #ifdef CONFIG_LOCKDEP
  348. if (lru->key)
  349. lockdep_set_class(&l->lock, lru->key);
  350. #endif
  351. }
  352. #ifdef CONFIG_MEMCG
  353. static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp)
  354. {
  355. int nid;
  356. struct list_lru_memcg *mlru;
  357. mlru = kmalloc_flex(*mlru, node, nr_node_ids, gfp);
  358. if (!mlru)
  359. return NULL;
  360. for_each_node(nid)
  361. init_one_lru(lru, &mlru->node[nid]);
  362. return mlru;
  363. }
  364. static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
  365. {
  366. if (memcg_aware)
  367. xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
  368. lru->memcg_aware = memcg_aware;
  369. }
  370. static void memcg_destroy_list_lru(struct list_lru *lru)
  371. {
  372. XA_STATE(xas, &lru->xa, 0);
  373. struct list_lru_memcg *mlru;
  374. if (!list_lru_memcg_aware(lru))
  375. return;
  376. xas_lock_irq(&xas);
  377. xas_for_each(&xas, mlru, ULONG_MAX) {
  378. kfree(mlru);
  379. xas_store(&xas, NULL);
  380. }
  381. xas_unlock_irq(&xas);
  382. }
  383. static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
  384. struct list_lru_one *src,
  385. struct mem_cgroup *dst_memcg)
  386. {
  387. int dst_idx = dst_memcg->kmemcg_id;
  388. struct list_lru_one *dst;
  389. spin_lock_irq(&src->lock);
  390. dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
  391. spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING);
  392. list_splice_init(&src->list, &dst->list);
  393. if (src->nr_items) {
  394. WARN_ON(src->nr_items < 0);
  395. dst->nr_items += src->nr_items;
  396. set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
  397. }
  398. /* Mark the list_lru_one dead */
  399. src->nr_items = LONG_MIN;
  400. spin_unlock(&dst->lock);
  401. spin_unlock_irq(&src->lock);
  402. }
  403. void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
  404. {
  405. struct list_lru *lru;
  406. int i;
  407. mutex_lock(&list_lrus_mutex);
  408. list_for_each_entry(lru, &memcg_list_lrus, list) {
  409. struct list_lru_memcg *mlru;
  410. XA_STATE(xas, &lru->xa, memcg->kmemcg_id);
  411. /*
  412. * Lock the Xarray to ensure no on going list_lru_memcg
  413. * allocation and further allocation will see css_is_dying().
  414. */
  415. xas_lock_irq(&xas);
  416. mlru = xas_store(&xas, NULL);
  417. xas_unlock_irq(&xas);
  418. if (!mlru)
  419. continue;
  420. /*
  421. * With Xarray value set to NULL, holding the lru lock below
  422. * prevents list_lru_{add,del,isolate} from touching the lru,
  423. * safe to reparent.
  424. */
  425. for_each_node(i)
  426. memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent);
  427. /*
  428. * Here all list_lrus corresponding to the cgroup are guaranteed
  429. * to remain empty, we can safely free this lru, any further
  430. * memcg_list_lru_alloc() call will simply bail out.
  431. */
  432. kvfree_rcu(mlru, rcu);
  433. }
  434. mutex_unlock(&list_lrus_mutex);
  435. }
  436. static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
  437. struct list_lru *lru)
  438. {
  439. int idx = memcg->kmemcg_id;
  440. return idx < 0 || xa_load(&lru->xa, idx);
  441. }
  442. int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
  443. gfp_t gfp)
  444. {
  445. unsigned long flags;
  446. struct list_lru_memcg *mlru = NULL;
  447. struct mem_cgroup *pos, *parent;
  448. XA_STATE(xas, &lru->xa, 0);
  449. if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
  450. return 0;
  451. gfp &= GFP_RECLAIM_MASK;
  452. /*
  453. * Because the list_lru can be reparented to the parent cgroup's
  454. * list_lru, we should make sure that this cgroup and all its
  455. * ancestors have allocated list_lru_memcg.
  456. */
  457. do {
  458. /*
  459. * Keep finding the farest parent that wasn't populated
  460. * until found memcg itself.
  461. */
  462. pos = memcg;
  463. parent = parent_mem_cgroup(pos);
  464. while (!memcg_list_lru_allocated(parent, lru)) {
  465. pos = parent;
  466. parent = parent_mem_cgroup(pos);
  467. }
  468. if (!mlru) {
  469. mlru = memcg_init_list_lru_one(lru, gfp);
  470. if (!mlru)
  471. return -ENOMEM;
  472. }
  473. xas_set(&xas, pos->kmemcg_id);
  474. do {
  475. xas_lock_irqsave(&xas, flags);
  476. if (!xas_load(&xas) && !css_is_dying(&pos->css)) {
  477. xas_store(&xas, mlru);
  478. if (!xas_error(&xas))
  479. mlru = NULL;
  480. }
  481. xas_unlock_irqrestore(&xas, flags);
  482. } while (xas_nomem(&xas, gfp));
  483. } while (pos != memcg && !css_is_dying(&pos->css));
  484. if (unlikely(mlru))
  485. kfree(mlru);
  486. return xas_error(&xas);
  487. }
  488. #else
  489. static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
  490. {
  491. }
  492. static void memcg_destroy_list_lru(struct list_lru *lru)
  493. {
  494. }
  495. #endif /* CONFIG_MEMCG */
  496. int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker)
  497. {
  498. int i;
  499. #ifdef CONFIG_MEMCG
  500. if (shrinker)
  501. lru->shrinker_id = shrinker->id;
  502. else
  503. lru->shrinker_id = -1;
  504. if (mem_cgroup_kmem_disabled())
  505. memcg_aware = false;
  506. #endif
  507. lru->node = kzalloc_objs(*lru->node, nr_node_ids);
  508. if (!lru->node)
  509. return -ENOMEM;
  510. for_each_node(i)
  511. init_one_lru(lru, &lru->node[i].lru);
  512. memcg_init_list_lru(lru, memcg_aware);
  513. list_lru_register(lru);
  514. return 0;
  515. }
  516. EXPORT_SYMBOL_GPL(__list_lru_init);
  517. void list_lru_destroy(struct list_lru *lru)
  518. {
  519. /* Already destroyed or not yet initialized? */
  520. if (!lru->node)
  521. return;
  522. list_lru_unregister(lru);
  523. memcg_destroy_list_lru(lru);
  524. kfree(lru->node);
  525. lru->node = NULL;
  526. #ifdef CONFIG_MEMCG
  527. lru->shrinker_id = -1;
  528. #endif
  529. }
  530. EXPORT_SYMBOL_GPL(list_lru_destroy);