radix-tree.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (C) 2001 Momchil Velikov
  4. * Portions Copyright (C) 2001 Christoph Hellwig
  5. * Copyright (C) 2005 SGI, Christoph Lameter
  6. * Copyright (C) 2006 Nick Piggin
  7. * Copyright (C) 2012 Konstantin Khlebnikov
  8. * Copyright (C) 2016 Intel, Matthew Wilcox
  9. * Copyright (C) 2016 Intel, Ross Zwisler
  10. */
  11. #include <linux/bitmap.h>
  12. #include <linux/bitops.h>
  13. #include <linux/bug.h>
  14. #include <linux/cpu.h>
  15. #include <linux/errno.h>
  16. #include <linux/export.h>
  17. #include <linux/idr.h>
  18. #include <linux/init.h>
  19. #include <linux/kernel.h>
  20. #include <linux/kmemleak.h>
  21. #include <linux/percpu.h>
  22. #include <linux/preempt.h> /* in_interrupt() */
  23. #include <linux/radix-tree.h>
  24. #include <linux/rcupdate.h>
  25. #include <linux/slab.h>
  26. #include <linux/string.h>
  27. #include <linux/xarray.h>
  28. #include "radix-tree.h"
  29. /*
  30. * Radix tree node cache.
  31. */
  32. struct kmem_cache *radix_tree_node_cachep;
  33. /*
  34. * The radix tree is variable-height, so an insert operation not only has
  35. * to build the branch to its corresponding item, it also has to build the
  36. * branch to existing items if the size has to be increased (by
  37. * radix_tree_extend).
  38. *
  39. * The worst case is a zero height tree with just a single item at index 0,
  40. * and then inserting an item at index ULONG_MAX. This requires 2 new branches
  41. * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
  42. * Hence:
  43. */
  44. #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
  45. /*
  46. * The IDR does not have to be as high as the radix tree since it uses
  47. * signed integers, not unsigned longs.
  48. */
  49. #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1)
  50. #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \
  51. RADIX_TREE_MAP_SHIFT))
  52. #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1)
  53. /*
  54. * Per-cpu pool of preloaded nodes
  55. */
  56. DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
  57. .lock = INIT_LOCAL_LOCK(lock),
  58. };
  59. EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
  60. static inline struct radix_tree_node *entry_to_node(void *ptr)
  61. {
  62. return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
  63. }
  64. static inline void *node_to_entry(void *ptr)
  65. {
  66. return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
  67. }
  68. #define RADIX_TREE_RETRY XA_RETRY_ENTRY
  69. static inline unsigned long
  70. get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
  71. {
  72. return parent ? slot - parent->slots : 0;
  73. }
  74. static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
  75. struct radix_tree_node **nodep, unsigned long index)
  76. {
  77. unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
  78. void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
  79. *nodep = (void *)entry;
  80. return offset;
  81. }
  82. static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
  83. {
  84. return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
  85. }
  86. static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
  87. int offset)
  88. {
  89. __set_bit(offset, node->tags[tag]);
  90. }
  91. static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
  92. int offset)
  93. {
  94. __clear_bit(offset, node->tags[tag]);
  95. }
  96. static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
  97. int offset)
  98. {
  99. return test_bit(offset, node->tags[tag]);
  100. }
  101. static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
  102. {
  103. root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
  104. }
  105. static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
  106. {
  107. root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
  108. }
  109. static inline void root_tag_clear_all(struct radix_tree_root *root)
  110. {
  111. root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
  112. }
  113. static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
  114. {
  115. return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
  116. }
  117. static inline unsigned root_tags_get(const struct radix_tree_root *root)
  118. {
  119. return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
  120. }
  121. static inline bool is_idr(const struct radix_tree_root *root)
  122. {
  123. return !!(root->xa_flags & ROOT_IS_IDR);
  124. }
  125. /*
  126. * Returns 1 if any slot in the node has this tag set.
  127. * Otherwise returns 0.
  128. */
  129. static inline int any_tag_set(const struct radix_tree_node *node,
  130. unsigned int tag)
  131. {
  132. unsigned idx;
  133. for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
  134. if (node->tags[tag][idx])
  135. return 1;
  136. }
  137. return 0;
  138. }
  139. static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
  140. {
  141. bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
  142. }
  143. /**
  144. * radix_tree_find_next_bit - find the next set bit in a memory region
  145. *
  146. * @node: where to begin the search
  147. * @tag: the tag index
  148. * @offset: the bitnumber to start searching at
  149. *
  150. * Unrollable variant of find_next_bit() for constant size arrays.
  151. * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
  152. * Returns next bit offset, or size if nothing found.
  153. */
  154. static __always_inline unsigned long
  155. radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
  156. unsigned long offset)
  157. {
  158. const unsigned long *addr = node->tags[tag];
  159. if (offset < RADIX_TREE_MAP_SIZE) {
  160. unsigned long tmp;
  161. addr += offset / BITS_PER_LONG;
  162. tmp = *addr >> (offset % BITS_PER_LONG);
  163. if (tmp)
  164. return __ffs(tmp) + offset;
  165. offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
  166. while (offset < RADIX_TREE_MAP_SIZE) {
  167. tmp = *++addr;
  168. if (tmp)
  169. return __ffs(tmp) + offset;
  170. offset += BITS_PER_LONG;
  171. }
  172. }
  173. return RADIX_TREE_MAP_SIZE;
  174. }
  175. static unsigned int iter_offset(const struct radix_tree_iter *iter)
  176. {
  177. return iter->index & RADIX_TREE_MAP_MASK;
  178. }
  179. /*
  180. * The maximum index which can be stored in a radix tree
  181. */
  182. static inline unsigned long shift_maxindex(unsigned int shift)
  183. {
  184. return (RADIX_TREE_MAP_SIZE << shift) - 1;
  185. }
  186. static inline unsigned long node_maxindex(const struct radix_tree_node *node)
  187. {
  188. return shift_maxindex(node->shift);
  189. }
  190. static unsigned long next_index(unsigned long index,
  191. const struct radix_tree_node *node,
  192. unsigned long offset)
  193. {
  194. return (index & ~node_maxindex(node)) + (offset << node->shift);
  195. }
  196. /*
  197. * This assumes that the caller has performed appropriate preallocation, and
  198. * that the caller has pinned this thread of control to the current CPU.
  199. */
  200. static struct radix_tree_node *
  201. radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
  202. struct radix_tree_root *root,
  203. unsigned int shift, unsigned int offset,
  204. unsigned int count, unsigned int nr_values)
  205. {
  206. struct radix_tree_node *ret = NULL;
  207. /*
  208. * Preload code isn't irq safe and it doesn't make sense to use
  209. * preloading during an interrupt anyway as all the allocations have
  210. * to be atomic. So just do normal allocation when in interrupt.
  211. */
  212. if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
  213. struct radix_tree_preload *rtp;
  214. /*
  215. * Even if the caller has preloaded, try to allocate from the
  216. * cache first for the new node to get accounted to the memory
  217. * cgroup.
  218. */
  219. ret = kmem_cache_alloc(radix_tree_node_cachep,
  220. gfp_mask | __GFP_NOWARN);
  221. if (ret)
  222. goto out;
  223. /*
  224. * Provided the caller has preloaded here, we will always
  225. * succeed in getting a node here (and never reach
  226. * kmem_cache_alloc)
  227. */
  228. rtp = this_cpu_ptr(&radix_tree_preloads);
  229. if (rtp->nr) {
  230. ret = rtp->nodes;
  231. rtp->nodes = ret->parent;
  232. rtp->nr--;
  233. }
  234. /*
  235. * Update the allocation stack trace as this is more useful
  236. * for debugging.
  237. */
  238. kmemleak_update_trace(ret);
  239. goto out;
  240. }
  241. ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  242. out:
  243. BUG_ON(radix_tree_is_internal_node(ret));
  244. if (ret) {
  245. ret->shift = shift;
  246. ret->offset = offset;
  247. ret->count = count;
  248. ret->nr_values = nr_values;
  249. ret->parent = parent;
  250. ret->array = root;
  251. }
  252. return ret;
  253. }
  254. void radix_tree_node_rcu_free(struct rcu_head *head)
  255. {
  256. struct radix_tree_node *node =
  257. container_of(head, struct radix_tree_node, rcu_head);
  258. /*
  259. * Must only free zeroed nodes into the slab. We can be left with
  260. * non-NULL entries by radix_tree_free_nodes, so clear the entries
  261. * and tags here.
  262. */
  263. memset(node->slots, 0, sizeof(node->slots));
  264. memset(node->tags, 0, sizeof(node->tags));
  265. INIT_LIST_HEAD(&node->private_list);
  266. kmem_cache_free(radix_tree_node_cachep, node);
  267. }
  268. static inline void
  269. radix_tree_node_free(struct radix_tree_node *node)
  270. {
  271. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  272. }
  273. /*
  274. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  275. * ensure that the addition of a single element in the tree cannot fail. On
  276. * success, return zero, with preemption disabled. On error, return -ENOMEM
  277. * with preemption not disabled.
  278. *
  279. * To make use of this facility, the radix tree must be initialised without
  280. * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  281. */
  282. static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
  283. {
  284. struct radix_tree_preload *rtp;
  285. struct radix_tree_node *node;
  286. int ret = -ENOMEM;
  287. /*
  288. * Nodes preloaded by one cgroup can be used by another cgroup, so
  289. * they should never be accounted to any particular memory cgroup.
  290. */
  291. gfp_mask &= ~__GFP_ACCOUNT;
  292. local_lock(&radix_tree_preloads.lock);
  293. rtp = this_cpu_ptr(&radix_tree_preloads);
  294. while (rtp->nr < nr) {
  295. local_unlock(&radix_tree_preloads.lock);
  296. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  297. if (node == NULL)
  298. goto out;
  299. local_lock(&radix_tree_preloads.lock);
  300. rtp = this_cpu_ptr(&radix_tree_preloads);
  301. if (rtp->nr < nr) {
  302. node->parent = rtp->nodes;
  303. rtp->nodes = node;
  304. rtp->nr++;
  305. } else {
  306. kmem_cache_free(radix_tree_node_cachep, node);
  307. }
  308. }
  309. ret = 0;
  310. out:
  311. return ret;
  312. }
  313. /*
  314. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  315. * ensure that the addition of a single element in the tree cannot fail. On
  316. * success, return zero, with preemption disabled. On error, return -ENOMEM
  317. * with preemption not disabled.
  318. *
  319. * To make use of this facility, the radix tree must be initialised without
  320. * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  321. */
  322. int radix_tree_preload(gfp_t gfp_mask)
  323. {
  324. /* Warn on non-sensical use... */
  325. WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
  326. return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
  327. }
  328. EXPORT_SYMBOL(radix_tree_preload);
  329. /*
  330. * The same as above function, except we don't guarantee preloading happens.
  331. * We do it, if we decide it helps. On success, return zero with preemption
  332. * disabled. On error, return -ENOMEM with preemption not disabled.
  333. */
  334. int radix_tree_maybe_preload(gfp_t gfp_mask)
  335. {
  336. if (gfpflags_allow_blocking(gfp_mask))
  337. return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
  338. /* Preloading doesn't help anything with this gfp mask, skip it */
  339. local_lock(&radix_tree_preloads.lock);
  340. return 0;
  341. }
  342. EXPORT_SYMBOL(radix_tree_maybe_preload);
  343. static unsigned radix_tree_load_root(const struct radix_tree_root *root,
  344. struct radix_tree_node **nodep, unsigned long *maxindex)
  345. {
  346. struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
  347. *nodep = node;
  348. if (likely(radix_tree_is_internal_node(node))) {
  349. node = entry_to_node(node);
  350. *maxindex = node_maxindex(node);
  351. return node->shift + RADIX_TREE_MAP_SHIFT;
  352. }
  353. *maxindex = 0;
  354. return 0;
  355. }
  356. /*
  357. * Extend a radix tree so it can store key @index.
  358. */
  359. static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
  360. unsigned long index, unsigned int shift)
  361. {
  362. void *entry;
  363. unsigned int maxshift;
  364. int tag;
  365. /* Figure out what the shift should be. */
  366. maxshift = shift;
  367. while (index > shift_maxindex(maxshift))
  368. maxshift += RADIX_TREE_MAP_SHIFT;
  369. entry = rcu_dereference_raw(root->xa_head);
  370. if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
  371. goto out;
  372. do {
  373. struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
  374. root, shift, 0, 1, 0);
  375. if (!node)
  376. return -ENOMEM;
  377. if (is_idr(root)) {
  378. all_tag_set(node, IDR_FREE);
  379. if (!root_tag_get(root, IDR_FREE)) {
  380. tag_clear(node, IDR_FREE, 0);
  381. root_tag_set(root, IDR_FREE);
  382. }
  383. } else {
  384. /* Propagate the aggregated tag info to the new child */
  385. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  386. if (root_tag_get(root, tag))
  387. tag_set(node, tag, 0);
  388. }
  389. }
  390. BUG_ON(shift > BITS_PER_LONG);
  391. if (radix_tree_is_internal_node(entry)) {
  392. entry_to_node(entry)->parent = node;
  393. } else if (xa_is_value(entry)) {
  394. /* Moving a value entry root->xa_head to a node */
  395. node->nr_values = 1;
  396. }
  397. /*
  398. * entry was already in the radix tree, so we do not need
  399. * rcu_assign_pointer here
  400. */
  401. node->slots[0] = (void __rcu *)entry;
  402. entry = node_to_entry(node);
  403. rcu_assign_pointer(root->xa_head, entry);
  404. shift += RADIX_TREE_MAP_SHIFT;
  405. } while (shift <= maxshift);
  406. out:
  407. return maxshift + RADIX_TREE_MAP_SHIFT;
  408. }
  409. /**
  410. * radix_tree_shrink - shrink radix tree to minimum height
  411. * @root: radix tree root
  412. */
  413. static inline bool radix_tree_shrink(struct radix_tree_root *root)
  414. {
  415. bool shrunk = false;
  416. for (;;) {
  417. struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
  418. struct radix_tree_node *child;
  419. if (!radix_tree_is_internal_node(node))
  420. break;
  421. node = entry_to_node(node);
  422. /*
  423. * The candidate node has more than one child, or its child
  424. * is not at the leftmost slot, we cannot shrink.
  425. */
  426. if (node->count != 1)
  427. break;
  428. child = rcu_dereference_raw(node->slots[0]);
  429. if (!child)
  430. break;
  431. /*
  432. * For an IDR, we must not shrink entry 0 into the root in
  433. * case somebody calls idr_replace() with a pointer that
  434. * appears to be an internal entry
  435. */
  436. if (!node->shift && is_idr(root))
  437. break;
  438. if (radix_tree_is_internal_node(child))
  439. entry_to_node(child)->parent = NULL;
  440. /*
  441. * We don't need rcu_assign_pointer(), since we are simply
  442. * moving the node from one part of the tree to another: if it
  443. * was safe to dereference the old pointer to it
  444. * (node->slots[0]), it will be safe to dereference the new
  445. * one (root->xa_head) as far as dependent read barriers go.
  446. */
  447. root->xa_head = (void __rcu *)child;
  448. if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
  449. root_tag_clear(root, IDR_FREE);
  450. /*
  451. * We have a dilemma here. The node's slot[0] must not be
  452. * NULLed in case there are concurrent lookups expecting to
  453. * find the item. However if this was a bottom-level node,
  454. * then it may be subject to the slot pointer being visible
  455. * to callers dereferencing it. If item corresponding to
  456. * slot[0] is subsequently deleted, these callers would expect
  457. * their slot to become empty sooner or later.
  458. *
  459. * For example, lockless pagecache will look up a slot, deref
  460. * the page pointer, and if the page has 0 refcount it means it
  461. * was concurrently deleted from pagecache so try the deref
  462. * again. Fortunately there is already a requirement for logic
  463. * to retry the entire slot lookup -- the indirect pointer
  464. * problem (replacing direct root node with an indirect pointer
  465. * also results in a stale slot). So tag the slot as indirect
  466. * to force callers to retry.
  467. */
  468. node->count = 0;
  469. if (!radix_tree_is_internal_node(child)) {
  470. node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
  471. }
  472. WARN_ON_ONCE(!list_empty(&node->private_list));
  473. radix_tree_node_free(node);
  474. shrunk = true;
  475. }
  476. return shrunk;
  477. }
  478. static bool delete_node(struct radix_tree_root *root,
  479. struct radix_tree_node *node)
  480. {
  481. bool deleted = false;
  482. do {
  483. struct radix_tree_node *parent;
  484. if (node->count) {
  485. if (node_to_entry(node) ==
  486. rcu_dereference_raw(root->xa_head))
  487. deleted |= radix_tree_shrink(root);
  488. return deleted;
  489. }
  490. parent = node->parent;
  491. if (parent) {
  492. parent->slots[node->offset] = NULL;
  493. parent->count--;
  494. } else {
  495. /*
  496. * Shouldn't the tags already have all been cleared
  497. * by the caller?
  498. */
  499. if (!is_idr(root))
  500. root_tag_clear_all(root);
  501. root->xa_head = NULL;
  502. }
  503. WARN_ON_ONCE(!list_empty(&node->private_list));
  504. radix_tree_node_free(node);
  505. deleted = true;
  506. node = parent;
  507. } while (node);
  508. return deleted;
  509. }
  510. /**
  511. * __radix_tree_create - create a slot in a radix tree
  512. * @root: radix tree root
  513. * @index: index key
  514. * @nodep: returns node
  515. * @slotp: returns slot
  516. *
  517. * Create, if necessary, and return the node and slot for an item
  518. * at position @index in the radix tree @root.
  519. *
  520. * Until there is more than one item in the tree, no nodes are
  521. * allocated and @root->xa_head is used as a direct slot instead of
  522. * pointing to a node, in which case *@nodep will be NULL.
  523. *
  524. * Returns -ENOMEM, or 0 for success.
  525. */
  526. static int __radix_tree_create(struct radix_tree_root *root,
  527. unsigned long index, struct radix_tree_node **nodep,
  528. void __rcu ***slotp)
  529. {
  530. struct radix_tree_node *node = NULL, *child;
  531. void __rcu **slot = (void __rcu **)&root->xa_head;
  532. unsigned long maxindex;
  533. unsigned int shift, offset = 0;
  534. unsigned long max = index;
  535. gfp_t gfp = root_gfp_mask(root);
  536. shift = radix_tree_load_root(root, &child, &maxindex);
  537. /* Make sure the tree is high enough. */
  538. if (max > maxindex) {
  539. int error = radix_tree_extend(root, gfp, max, shift);
  540. if (error < 0)
  541. return error;
  542. shift = error;
  543. child = rcu_dereference_raw(root->xa_head);
  544. }
  545. while (shift > 0) {
  546. shift -= RADIX_TREE_MAP_SHIFT;
  547. if (child == NULL) {
  548. /* Have to add a child node. */
  549. child = radix_tree_node_alloc(gfp, node, root, shift,
  550. offset, 0, 0);
  551. if (!child)
  552. return -ENOMEM;
  553. rcu_assign_pointer(*slot, node_to_entry(child));
  554. if (node)
  555. node->count++;
  556. } else if (!radix_tree_is_internal_node(child))
  557. break;
  558. /* Go a level down */
  559. node = entry_to_node(child);
  560. offset = radix_tree_descend(node, &child, index);
  561. slot = &node->slots[offset];
  562. }
  563. if (nodep)
  564. *nodep = node;
  565. if (slotp)
  566. *slotp = slot;
  567. return 0;
  568. }
  569. /*
  570. * Free any nodes below this node. The tree is presumed to not need
  571. * shrinking, and any user data in the tree is presumed to not need a
  572. * destructor called on it. If we need to add a destructor, we can
  573. * add that functionality later. Note that we may not clear tags or
  574. * slots from the tree as an RCU walker may still have a pointer into
  575. * this subtree. We could replace the entries with RADIX_TREE_RETRY,
  576. * but we'll still have to clear those in rcu_free.
  577. */
  578. static void radix_tree_free_nodes(struct radix_tree_node *node)
  579. {
  580. unsigned offset = 0;
  581. struct radix_tree_node *child = entry_to_node(node);
  582. for (;;) {
  583. void *entry = rcu_dereference_raw(child->slots[offset]);
  584. if (xa_is_node(entry) && child->shift) {
  585. child = entry_to_node(entry);
  586. offset = 0;
  587. continue;
  588. }
  589. offset++;
  590. while (offset == RADIX_TREE_MAP_SIZE) {
  591. struct radix_tree_node *old = child;
  592. offset = child->offset + 1;
  593. child = child->parent;
  594. WARN_ON_ONCE(!list_empty(&old->private_list));
  595. radix_tree_node_free(old);
  596. if (old == entry_to_node(node))
  597. return;
  598. }
  599. }
  600. }
  601. static inline int insert_entries(struct radix_tree_node *node,
  602. void __rcu **slot, void *item)
  603. {
  604. if (*slot)
  605. return -EEXIST;
  606. rcu_assign_pointer(*slot, item);
  607. if (node) {
  608. node->count++;
  609. if (xa_is_value(item))
  610. node->nr_values++;
  611. }
  612. return 1;
  613. }
  614. /**
  615. * radix_tree_insert - insert into a radix tree
  616. * @root: radix tree root
  617. * @index: index key
  618. * @item: item to insert
  619. *
  620. * Insert an item into the radix tree at position @index.
  621. */
  622. int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
  623. void *item)
  624. {
  625. struct radix_tree_node *node;
  626. void __rcu **slot;
  627. int error;
  628. BUG_ON(radix_tree_is_internal_node(item));
  629. error = __radix_tree_create(root, index, &node, &slot);
  630. if (error)
  631. return error;
  632. error = insert_entries(node, slot, item);
  633. if (error < 0)
  634. return error;
  635. if (node) {
  636. unsigned offset = get_slot_offset(node, slot);
  637. BUG_ON(tag_get(node, 0, offset));
  638. BUG_ON(tag_get(node, 1, offset));
  639. BUG_ON(tag_get(node, 2, offset));
  640. } else {
  641. BUG_ON(root_tags_get(root));
  642. }
  643. return 0;
  644. }
  645. EXPORT_SYMBOL(radix_tree_insert);
  646. /**
  647. * __radix_tree_lookup - lookup an item in a radix tree
  648. * @root: radix tree root
  649. * @index: index key
  650. * @nodep: returns node
  651. * @slotp: returns slot
  652. *
  653. * Lookup and return the item at position @index in the radix
  654. * tree @root.
  655. *
  656. * Until there is more than one item in the tree, no nodes are
  657. * allocated and @root->xa_head is used as a direct slot instead of
  658. * pointing to a node, in which case *@nodep will be NULL.
  659. */
  660. void *__radix_tree_lookup(const struct radix_tree_root *root,
  661. unsigned long index, struct radix_tree_node **nodep,
  662. void __rcu ***slotp)
  663. {
  664. struct radix_tree_node *node, *parent;
  665. unsigned long maxindex;
  666. void __rcu **slot;
  667. restart:
  668. parent = NULL;
  669. slot = (void __rcu **)&root->xa_head;
  670. radix_tree_load_root(root, &node, &maxindex);
  671. if (index > maxindex)
  672. return NULL;
  673. while (radix_tree_is_internal_node(node)) {
  674. unsigned offset;
  675. parent = entry_to_node(node);
  676. offset = radix_tree_descend(parent, &node, index);
  677. slot = parent->slots + offset;
  678. if (node == RADIX_TREE_RETRY)
  679. goto restart;
  680. if (parent->shift == 0)
  681. break;
  682. }
  683. if (nodep)
  684. *nodep = parent;
  685. if (slotp)
  686. *slotp = slot;
  687. return node;
  688. }
  689. /**
  690. * radix_tree_lookup_slot - lookup a slot in a radix tree
  691. * @root: radix tree root
  692. * @index: index key
  693. *
  694. * Returns: the slot corresponding to the position @index in the
  695. * radix tree @root. This is useful for update-if-exists operations.
  696. *
  697. * This function can be called under rcu_read_lock iff the slot is not
  698. * modified by radix_tree_replace_slot, otherwise it must be called
  699. * exclusive from other writers. Any dereference of the slot must be done
  700. * using radix_tree_deref_slot.
  701. */
  702. void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
  703. unsigned long index)
  704. {
  705. void __rcu **slot;
  706. if (!__radix_tree_lookup(root, index, NULL, &slot))
  707. return NULL;
  708. return slot;
  709. }
  710. EXPORT_SYMBOL(radix_tree_lookup_slot);
  711. /**
  712. * radix_tree_lookup - perform lookup operation on a radix tree
  713. * @root: radix tree root
  714. * @index: index key
  715. *
  716. * Lookup the item at the position @index in the radix tree @root.
  717. *
  718. * This function can be called under rcu_read_lock, however the caller
  719. * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
  720. * them safely). No RCU barriers are required to access or modify the
  721. * returned item, however.
  722. */
  723. void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
  724. {
  725. return __radix_tree_lookup(root, index, NULL, NULL);
  726. }
  727. EXPORT_SYMBOL(radix_tree_lookup);
  728. static void replace_slot(void __rcu **slot, void *item,
  729. struct radix_tree_node *node, int count, int values)
  730. {
  731. if (node && (count || values)) {
  732. node->count += count;
  733. node->nr_values += values;
  734. }
  735. rcu_assign_pointer(*slot, item);
  736. }
  737. static bool node_tag_get(const struct radix_tree_root *root,
  738. const struct radix_tree_node *node,
  739. unsigned int tag, unsigned int offset)
  740. {
  741. if (node)
  742. return tag_get(node, tag, offset);
  743. return root_tag_get(root, tag);
  744. }
  745. /*
  746. * IDR users want to be able to store NULL in the tree, so if the slot isn't
  747. * free, don't adjust the count, even if it's transitioning between NULL and
  748. * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still
  749. * have empty bits, but it only stores NULL in slots when they're being
  750. * deleted.
  751. */
  752. static int calculate_count(struct radix_tree_root *root,
  753. struct radix_tree_node *node, void __rcu **slot,
  754. void *item, void *old)
  755. {
  756. if (is_idr(root)) {
  757. unsigned offset = get_slot_offset(node, slot);
  758. bool free = node_tag_get(root, node, IDR_FREE, offset);
  759. if (!free)
  760. return 0;
  761. if (!old)
  762. return 1;
  763. }
  764. return !!item - !!old;
  765. }
  766. /**
  767. * __radix_tree_replace - replace item in a slot
  768. * @root: radix tree root
  769. * @node: pointer to tree node
  770. * @slot: pointer to slot in @node
  771. * @item: new item to store in the slot.
  772. *
  773. * For use with __radix_tree_lookup(). Caller must hold tree write locked
  774. * across slot lookup and replacement.
  775. */
  776. void __radix_tree_replace(struct radix_tree_root *root,
  777. struct radix_tree_node *node,
  778. void __rcu **slot, void *item)
  779. {
  780. void *old = rcu_dereference_raw(*slot);
  781. int values = !!xa_is_value(item) - !!xa_is_value(old);
  782. int count = calculate_count(root, node, slot, item, old);
  783. /*
  784. * This function supports replacing value entries and
  785. * deleting entries, but that needs accounting against the
  786. * node unless the slot is root->xa_head.
  787. */
  788. WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
  789. (count || values));
  790. replace_slot(slot, item, node, count, values);
  791. if (!node)
  792. return;
  793. delete_node(root, node);
  794. }
  795. /**
  796. * radix_tree_replace_slot - replace item in a slot
  797. * @root: radix tree root
  798. * @slot: pointer to slot
  799. * @item: new item to store in the slot.
  800. *
  801. * For use with radix_tree_lookup_slot() and
  802. * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
  803. * across slot lookup and replacement.
  804. *
  805. * NOTE: This cannot be used to switch between non-entries (empty slots),
  806. * regular entries, and value entries, as that requires accounting
  807. * inside the radix tree node. When switching from one type of entry or
  808. * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
  809. * radix_tree_iter_replace().
  810. */
  811. void radix_tree_replace_slot(struct radix_tree_root *root,
  812. void __rcu **slot, void *item)
  813. {
  814. __radix_tree_replace(root, NULL, slot, item);
  815. }
  816. EXPORT_SYMBOL(radix_tree_replace_slot);
  817. /**
  818. * radix_tree_iter_replace - replace item in a slot
  819. * @root: radix tree root
  820. * @iter: iterator state
  821. * @slot: pointer to slot
  822. * @item: new item to store in the slot.
  823. *
  824. * For use with radix_tree_for_each_slot().
  825. * Caller must hold tree write locked.
  826. */
  827. void radix_tree_iter_replace(struct radix_tree_root *root,
  828. const struct radix_tree_iter *iter,
  829. void __rcu **slot, void *item)
  830. {
  831. __radix_tree_replace(root, iter->node, slot, item);
  832. }
  833. static void node_tag_set(struct radix_tree_root *root,
  834. struct radix_tree_node *node,
  835. unsigned int tag, unsigned int offset)
  836. {
  837. while (node) {
  838. if (tag_get(node, tag, offset))
  839. return;
  840. tag_set(node, tag, offset);
  841. offset = node->offset;
  842. node = node->parent;
  843. }
  844. if (!root_tag_get(root, tag))
  845. root_tag_set(root, tag);
  846. }
  847. /**
  848. * radix_tree_tag_set - set a tag on a radix tree node
  849. * @root: radix tree root
  850. * @index: index key
  851. * @tag: tag index
  852. *
  853. * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
  854. * corresponding to @index in the radix tree. From
  855. * the root all the way down to the leaf node.
  856. *
  857. * Returns the address of the tagged item. Setting a tag on a not-present
  858. * item is a bug.
  859. */
  860. void *radix_tree_tag_set(struct radix_tree_root *root,
  861. unsigned long index, unsigned int tag)
  862. {
  863. struct radix_tree_node *node, *parent;
  864. unsigned long maxindex;
  865. radix_tree_load_root(root, &node, &maxindex);
  866. BUG_ON(index > maxindex);
  867. while (radix_tree_is_internal_node(node)) {
  868. unsigned offset;
  869. parent = entry_to_node(node);
  870. offset = radix_tree_descend(parent, &node, index);
  871. BUG_ON(!node);
  872. if (!tag_get(parent, tag, offset))
  873. tag_set(parent, tag, offset);
  874. }
  875. /* set the root's tag bit */
  876. if (!root_tag_get(root, tag))
  877. root_tag_set(root, tag);
  878. return node;
  879. }
  880. EXPORT_SYMBOL(radix_tree_tag_set);
  881. static void node_tag_clear(struct radix_tree_root *root,
  882. struct radix_tree_node *node,
  883. unsigned int tag, unsigned int offset)
  884. {
  885. while (node) {
  886. if (!tag_get(node, tag, offset))
  887. return;
  888. tag_clear(node, tag, offset);
  889. if (any_tag_set(node, tag))
  890. return;
  891. offset = node->offset;
  892. node = node->parent;
  893. }
  894. /* clear the root's tag bit */
  895. if (root_tag_get(root, tag))
  896. root_tag_clear(root, tag);
  897. }
  898. /**
  899. * radix_tree_tag_clear - clear a tag on a radix tree node
  900. * @root: radix tree root
  901. * @index: index key
  902. * @tag: tag index
  903. *
  904. * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
  905. * corresponding to @index in the radix tree. If this causes
  906. * the leaf node to have no tags set then clear the tag in the
  907. * next-to-leaf node, etc.
  908. *
  909. * Returns the address of the tagged item on success, else NULL. ie:
  910. * has the same return value and semantics as radix_tree_lookup().
  911. */
  912. void *radix_tree_tag_clear(struct radix_tree_root *root,
  913. unsigned long index, unsigned int tag)
  914. {
  915. struct radix_tree_node *node, *parent;
  916. unsigned long maxindex;
  917. int offset = 0;
  918. radix_tree_load_root(root, &node, &maxindex);
  919. if (index > maxindex)
  920. return NULL;
  921. parent = NULL;
  922. while (radix_tree_is_internal_node(node)) {
  923. parent = entry_to_node(node);
  924. offset = radix_tree_descend(parent, &node, index);
  925. }
  926. if (node)
  927. node_tag_clear(root, parent, tag, offset);
  928. return node;
  929. }
  930. EXPORT_SYMBOL(radix_tree_tag_clear);
  931. /**
  932. * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
  933. * @root: radix tree root
  934. * @iter: iterator state
  935. * @tag: tag to clear
  936. */
  937. void radix_tree_iter_tag_clear(struct radix_tree_root *root,
  938. const struct radix_tree_iter *iter, unsigned int tag)
  939. {
  940. node_tag_clear(root, iter->node, tag, iter_offset(iter));
  941. }
  942. /**
  943. * radix_tree_tag_get - get a tag on a radix tree node
  944. * @root: radix tree root
  945. * @index: index key
  946. * @tag: tag index (< RADIX_TREE_MAX_TAGS)
  947. *
  948. * Return values:
  949. *
  950. * 0: tag not present or not set
  951. * 1: tag set
  952. *
  953. * Note that the return value of this function may not be relied on, even if
  954. * the RCU lock is held, unless tag modification and node deletion are excluded
  955. * from concurrency.
  956. */
  957. int radix_tree_tag_get(const struct radix_tree_root *root,
  958. unsigned long index, unsigned int tag)
  959. {
  960. struct radix_tree_node *node, *parent;
  961. unsigned long maxindex;
  962. if (!root_tag_get(root, tag))
  963. return 0;
  964. radix_tree_load_root(root, &node, &maxindex);
  965. if (index > maxindex)
  966. return 0;
  967. while (radix_tree_is_internal_node(node)) {
  968. unsigned offset;
  969. parent = entry_to_node(node);
  970. offset = radix_tree_descend(parent, &node, index);
  971. if (!tag_get(parent, tag, offset))
  972. return 0;
  973. if (node == RADIX_TREE_RETRY)
  974. break;
  975. }
  976. return 1;
  977. }
  978. EXPORT_SYMBOL(radix_tree_tag_get);
  979. /* Construct iter->tags bit-mask from node->tags[tag] array */
  980. static void set_iter_tags(struct radix_tree_iter *iter,
  981. struct radix_tree_node *node, unsigned offset,
  982. unsigned tag)
  983. {
  984. unsigned tag_long = offset / BITS_PER_LONG;
  985. unsigned tag_bit = offset % BITS_PER_LONG;
  986. if (!node) {
  987. iter->tags = 1;
  988. return;
  989. }
  990. iter->tags = node->tags[tag][tag_long] >> tag_bit;
  991. /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
  992. if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
  993. /* Pick tags from next element */
  994. if (tag_bit)
  995. iter->tags |= node->tags[tag][tag_long + 1] <<
  996. (BITS_PER_LONG - tag_bit);
  997. /* Clip chunk size, here only BITS_PER_LONG tags */
  998. iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
  999. }
  1000. }
  1001. void __rcu **radix_tree_iter_resume(void __rcu **slot,
  1002. struct radix_tree_iter *iter)
  1003. {
  1004. iter->index = __radix_tree_iter_add(iter, 1);
  1005. iter->next_index = iter->index;
  1006. iter->tags = 0;
  1007. return NULL;
  1008. }
  1009. EXPORT_SYMBOL(radix_tree_iter_resume);
  1010. /**
  1011. * radix_tree_next_chunk - find next chunk of slots for iteration
  1012. *
  1013. * @root: radix tree root
  1014. * @iter: iterator state
  1015. * @flags: RADIX_TREE_ITER_* flags and tag index
  1016. * Returns: pointer to chunk first slot, or NULL if iteration is over
  1017. */
  1018. void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
  1019. struct radix_tree_iter *iter, unsigned flags)
  1020. {
  1021. unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
  1022. struct radix_tree_node *node, *child;
  1023. unsigned long index, offset, maxindex;
  1024. if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
  1025. return NULL;
  1026. /*
  1027. * Catch next_index overflow after ~0UL. iter->index never overflows
  1028. * during iterating; it can be zero only at the beginning.
  1029. * And we cannot overflow iter->next_index in a single step,
  1030. * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
  1031. *
  1032. * This condition also used by radix_tree_next_slot() to stop
  1033. * contiguous iterating, and forbid switching to the next chunk.
  1034. */
  1035. index = iter->next_index;
  1036. if (!index && iter->index)
  1037. return NULL;
  1038. restart:
  1039. radix_tree_load_root(root, &child, &maxindex);
  1040. if (index > maxindex)
  1041. return NULL;
  1042. if (!child)
  1043. return NULL;
  1044. if (!radix_tree_is_internal_node(child)) {
  1045. /* Single-slot tree */
  1046. iter->index = index;
  1047. iter->next_index = maxindex + 1;
  1048. iter->tags = 1;
  1049. iter->node = NULL;
  1050. return (void __rcu **)&root->xa_head;
  1051. }
  1052. do {
  1053. node = entry_to_node(child);
  1054. offset = radix_tree_descend(node, &child, index);
  1055. if ((flags & RADIX_TREE_ITER_TAGGED) ?
  1056. !tag_get(node, tag, offset) : !child) {
  1057. /* Hole detected */
  1058. if (flags & RADIX_TREE_ITER_CONTIG)
  1059. return NULL;
  1060. if (flags & RADIX_TREE_ITER_TAGGED)
  1061. offset = radix_tree_find_next_bit(node, tag,
  1062. offset + 1);
  1063. else
  1064. while (++offset < RADIX_TREE_MAP_SIZE) {
  1065. void *slot = rcu_dereference_raw(
  1066. node->slots[offset]);
  1067. if (slot)
  1068. break;
  1069. }
  1070. index &= ~node_maxindex(node);
  1071. index += offset << node->shift;
  1072. /* Overflow after ~0UL */
  1073. if (!index)
  1074. return NULL;
  1075. if (offset == RADIX_TREE_MAP_SIZE)
  1076. goto restart;
  1077. child = rcu_dereference_raw(node->slots[offset]);
  1078. }
  1079. if (!child)
  1080. goto restart;
  1081. if (child == RADIX_TREE_RETRY)
  1082. break;
  1083. } while (node->shift && radix_tree_is_internal_node(child));
  1084. /* Update the iterator state */
  1085. iter->index = (index &~ node_maxindex(node)) | offset;
  1086. iter->next_index = (index | node_maxindex(node)) + 1;
  1087. iter->node = node;
  1088. if (flags & RADIX_TREE_ITER_TAGGED)
  1089. set_iter_tags(iter, node, offset, tag);
  1090. return node->slots + offset;
  1091. }
  1092. EXPORT_SYMBOL(radix_tree_next_chunk);
  1093. /**
  1094. * radix_tree_gang_lookup - perform multiple lookup on a radix tree
  1095. * @root: radix tree root
  1096. * @results: where the results of the lookup are placed
  1097. * @first_index: start the lookup from this key
  1098. * @max_items: place up to this many items at *results
  1099. *
  1100. * Performs an index-ascending scan of the tree for present items. Places
  1101. * them at *@results and returns the number of items which were placed at
  1102. * *@results.
  1103. *
  1104. * The implementation is naive.
  1105. *
  1106. * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
  1107. * rcu_read_lock. In this case, rather than the returned results being
  1108. * an atomic snapshot of the tree at a single point in time, the
  1109. * semantics of an RCU protected gang lookup are as though multiple
  1110. * radix_tree_lookups have been issued in individual locks, and results
  1111. * stored in 'results'.
  1112. */
  1113. unsigned int
  1114. radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
  1115. unsigned long first_index, unsigned int max_items)
  1116. {
  1117. struct radix_tree_iter iter;
  1118. void __rcu **slot;
  1119. unsigned int ret = 0;
  1120. if (unlikely(!max_items))
  1121. return 0;
  1122. radix_tree_for_each_slot(slot, root, &iter, first_index) {
  1123. results[ret] = rcu_dereference_raw(*slot);
  1124. if (!results[ret])
  1125. continue;
  1126. if (radix_tree_is_internal_node(results[ret])) {
  1127. slot = radix_tree_iter_retry(&iter);
  1128. continue;
  1129. }
  1130. if (++ret == max_items)
  1131. break;
  1132. }
  1133. return ret;
  1134. }
  1135. EXPORT_SYMBOL(radix_tree_gang_lookup);
  1136. /**
  1137. * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  1138. * based on a tag
  1139. * @root: radix tree root
  1140. * @results: where the results of the lookup are placed
  1141. * @first_index: start the lookup from this key
  1142. * @max_items: place up to this many items at *results
  1143. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  1144. *
  1145. * Performs an index-ascending scan of the tree for present items which
  1146. * have the tag indexed by @tag set. Places the items at *@results and
  1147. * returns the number of items which were placed at *@results.
  1148. */
  1149. unsigned int
  1150. radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
  1151. unsigned long first_index, unsigned int max_items,
  1152. unsigned int tag)
  1153. {
  1154. struct radix_tree_iter iter;
  1155. void __rcu **slot;
  1156. unsigned int ret = 0;
  1157. if (unlikely(!max_items))
  1158. return 0;
  1159. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  1160. results[ret] = rcu_dereference_raw(*slot);
  1161. if (!results[ret])
  1162. continue;
  1163. if (radix_tree_is_internal_node(results[ret])) {
  1164. slot = radix_tree_iter_retry(&iter);
  1165. continue;
  1166. }
  1167. if (++ret == max_items)
  1168. break;
  1169. }
  1170. return ret;
  1171. }
  1172. EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  1173. /**
  1174. * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
  1175. * radix tree based on a tag
  1176. * @root: radix tree root
  1177. * @results: where the results of the lookup are placed
  1178. * @first_index: start the lookup from this key
  1179. * @max_items: place up to this many items at *results
  1180. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  1181. *
  1182. * Performs an index-ascending scan of the tree for present items which
  1183. * have the tag indexed by @tag set. Places the slots at *@results and
  1184. * returns the number of slots which were placed at *@results.
  1185. */
  1186. unsigned int
  1187. radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
  1188. void __rcu ***results, unsigned long first_index,
  1189. unsigned int max_items, unsigned int tag)
  1190. {
  1191. struct radix_tree_iter iter;
  1192. void __rcu **slot;
  1193. unsigned int ret = 0;
  1194. if (unlikely(!max_items))
  1195. return 0;
  1196. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  1197. results[ret] = slot;
  1198. if (++ret == max_items)
  1199. break;
  1200. }
  1201. return ret;
  1202. }
  1203. EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  1204. static bool __radix_tree_delete(struct radix_tree_root *root,
  1205. struct radix_tree_node *node, void __rcu **slot)
  1206. {
  1207. void *old = rcu_dereference_raw(*slot);
  1208. int values = xa_is_value(old) ? -1 : 0;
  1209. unsigned offset = get_slot_offset(node, slot);
  1210. int tag;
  1211. if (is_idr(root))
  1212. node_tag_set(root, node, IDR_FREE, offset);
  1213. else
  1214. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
  1215. node_tag_clear(root, node, tag, offset);
  1216. replace_slot(slot, NULL, node, -1, values);
  1217. return node && delete_node(root, node);
  1218. }
  1219. /**
  1220. * radix_tree_iter_delete - delete the entry at this iterator position
  1221. * @root: radix tree root
  1222. * @iter: iterator state
  1223. * @slot: pointer to slot
  1224. *
  1225. * Delete the entry at the position currently pointed to by the iterator.
  1226. * This may result in the current node being freed; if it is, the iterator
  1227. * is advanced so that it will not reference the freed memory. This
  1228. * function may be called without any locking if there are no other threads
  1229. * which can access this tree.
  1230. */
  1231. void radix_tree_iter_delete(struct radix_tree_root *root,
  1232. struct radix_tree_iter *iter, void __rcu **slot)
  1233. {
  1234. if (__radix_tree_delete(root, iter->node, slot))
  1235. iter->index = iter->next_index;
  1236. }
  1237. EXPORT_SYMBOL(radix_tree_iter_delete);
  1238. /**
  1239. * radix_tree_delete_item - delete an item from a radix tree
  1240. * @root: radix tree root
  1241. * @index: index key
  1242. * @item: expected item
  1243. *
  1244. * Remove @item at @index from the radix tree rooted at @root.
  1245. *
  1246. * Return: the deleted entry, or %NULL if it was not present
  1247. * or the entry at the given @index was not @item.
  1248. */
  1249. void *radix_tree_delete_item(struct radix_tree_root *root,
  1250. unsigned long index, void *item)
  1251. {
  1252. struct radix_tree_node *node = NULL;
  1253. void __rcu **slot = NULL;
  1254. void *entry;
  1255. entry = __radix_tree_lookup(root, index, &node, &slot);
  1256. if (!slot)
  1257. return NULL;
  1258. if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
  1259. get_slot_offset(node, slot))))
  1260. return NULL;
  1261. if (item && entry != item)
  1262. return NULL;
  1263. __radix_tree_delete(root, node, slot);
  1264. return entry;
  1265. }
  1266. EXPORT_SYMBOL(radix_tree_delete_item);
  1267. /**
  1268. * radix_tree_delete - delete an entry from a radix tree
  1269. * @root: radix tree root
  1270. * @index: index key
  1271. *
  1272. * Remove the entry at @index from the radix tree rooted at @root.
  1273. *
  1274. * Return: The deleted entry, or %NULL if it was not present.
  1275. */
  1276. void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
  1277. {
  1278. return radix_tree_delete_item(root, index, NULL);
  1279. }
  1280. EXPORT_SYMBOL(radix_tree_delete);
  1281. /**
  1282. * radix_tree_tagged - test whether any items in the tree are tagged
  1283. * @root: radix tree root
  1284. * @tag: tag to test
  1285. */
  1286. int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
  1287. {
  1288. return root_tag_get(root, tag);
  1289. }
  1290. EXPORT_SYMBOL(radix_tree_tagged);
  1291. /**
  1292. * idr_preload - preload for idr_alloc()
  1293. * @gfp_mask: allocation mask to use for preloading
  1294. *
  1295. * Preallocate memory to use for the next call to idr_alloc(). This function
  1296. * returns with preemption disabled. It will be enabled by idr_preload_end().
  1297. */
  1298. void idr_preload(gfp_t gfp_mask)
  1299. {
  1300. if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
  1301. local_lock(&radix_tree_preloads.lock);
  1302. }
  1303. EXPORT_SYMBOL(idr_preload);
  1304. void __rcu **idr_get_free(struct radix_tree_root *root,
  1305. struct radix_tree_iter *iter, gfp_t gfp,
  1306. unsigned long max)
  1307. {
  1308. struct radix_tree_node *node = NULL, *child;
  1309. void __rcu **slot = (void __rcu **)&root->xa_head;
  1310. unsigned long maxindex, start = iter->next_index;
  1311. unsigned int shift, offset = 0;
  1312. grow:
  1313. shift = radix_tree_load_root(root, &child, &maxindex);
  1314. if (!radix_tree_tagged(root, IDR_FREE))
  1315. start = max(start, maxindex + 1);
  1316. if (start > max)
  1317. return ERR_PTR(-ENOSPC);
  1318. if (start > maxindex) {
  1319. int error = radix_tree_extend(root, gfp, start, shift);
  1320. if (error < 0)
  1321. return ERR_PTR(error);
  1322. shift = error;
  1323. child = rcu_dereference_raw(root->xa_head);
  1324. }
  1325. if (start == 0 && shift == 0)
  1326. shift = RADIX_TREE_MAP_SHIFT;
  1327. while (shift) {
  1328. shift -= RADIX_TREE_MAP_SHIFT;
  1329. if (child == NULL) {
  1330. /* Have to add a child node. */
  1331. child = radix_tree_node_alloc(gfp, node, root, shift,
  1332. offset, 0, 0);
  1333. if (!child)
  1334. return ERR_PTR(-ENOMEM);
  1335. all_tag_set(child, IDR_FREE);
  1336. rcu_assign_pointer(*slot, node_to_entry(child));
  1337. if (node)
  1338. node->count++;
  1339. } else if (!radix_tree_is_internal_node(child))
  1340. break;
  1341. node = entry_to_node(child);
  1342. offset = radix_tree_descend(node, &child, start);
  1343. if (!tag_get(node, IDR_FREE, offset)) {
  1344. offset = radix_tree_find_next_bit(node, IDR_FREE,
  1345. offset + 1);
  1346. start = next_index(start, node, offset);
  1347. if (start > max || start == 0)
  1348. return ERR_PTR(-ENOSPC);
  1349. while (offset == RADIX_TREE_MAP_SIZE) {
  1350. offset = node->offset + 1;
  1351. node = node->parent;
  1352. if (!node)
  1353. goto grow;
  1354. shift = node->shift;
  1355. }
  1356. child = rcu_dereference_raw(node->slots[offset]);
  1357. }
  1358. slot = &node->slots[offset];
  1359. }
  1360. iter->index = start;
  1361. if (node)
  1362. iter->next_index = 1 + min(max, (start | node_maxindex(node)));
  1363. else
  1364. iter->next_index = 1;
  1365. iter->node = node;
  1366. set_iter_tags(iter, node, offset, IDR_FREE);
  1367. return slot;
  1368. }
  1369. /**
  1370. * idr_destroy - release all internal memory from an IDR
  1371. * @idr: idr handle
  1372. *
  1373. * After this function is called, the IDR is empty, and may be reused or
  1374. * the data structure containing it may be freed.
  1375. *
  1376. * A typical clean-up sequence for objects stored in an idr tree will use
  1377. * idr_for_each() to free all objects, if necessary, then idr_destroy() to
  1378. * free the memory used to keep track of those objects.
  1379. */
  1380. void idr_destroy(struct idr *idr)
  1381. {
  1382. struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
  1383. if (radix_tree_is_internal_node(node))
  1384. radix_tree_free_nodes(node);
  1385. idr->idr_rt.xa_head = NULL;
  1386. root_tag_set(&idr->idr_rt, IDR_FREE);
  1387. }
  1388. EXPORT_SYMBOL(idr_destroy);
  1389. static void
  1390. radix_tree_node_ctor(void *arg)
  1391. {
  1392. struct radix_tree_node *node = arg;
  1393. memset(node, 0, sizeof(*node));
  1394. INIT_LIST_HEAD(&node->private_list);
  1395. }
  1396. static int radix_tree_cpu_dead(unsigned int cpu)
  1397. {
  1398. struct radix_tree_preload *rtp;
  1399. struct radix_tree_node *node;
  1400. /* Free per-cpu pool of preloaded nodes */
  1401. rtp = &per_cpu(radix_tree_preloads, cpu);
  1402. while (rtp->nr) {
  1403. node = rtp->nodes;
  1404. rtp->nodes = node->parent;
  1405. kmem_cache_free(radix_tree_node_cachep, node);
  1406. rtp->nr--;
  1407. }
  1408. return 0;
  1409. }
  1410. void __init radix_tree_init(void)
  1411. {
  1412. int ret;
  1413. BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
  1414. BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
  1415. BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
  1416. radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
  1417. sizeof(struct radix_tree_node), 0,
  1418. SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
  1419. radix_tree_node_ctor);
  1420. ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
  1421. NULL, radix_tree_cpu_dead);
  1422. WARN_ON(ret < 0);
  1423. }