i915_syncmap.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. * Copyright © 2017 Intel Corporation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. *
  23. */
  24. #include <linux/slab.h>
  25. #include "i915_syncmap.h"
  26. #include "i915_gem.h" /* GEM_BUG_ON() */
  27. #include "i915_selftest.h"
  28. #define SHIFT ilog2(KSYNCMAP)
  29. #define MASK (KSYNCMAP - 1)
  30. /*
  31. * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
  32. * context id to the last u32 fence seqno waited upon from that context.
  33. * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
  34. * the root. This allows us to access the whole tree via a single pointer
  35. * to the most recently used layer. We expect fence contexts to be dense
  36. * and most reuse to be on the same i915_gem_context but on neighbouring
  37. * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
  38. * effective lookup cache. If the new lookup is not on the same leaf, we
  39. * expect it to be on the neighbouring branch.
  40. *
  41. * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
  42. * allows us to store whether a particular seqno is valid (i.e. allows us
  43. * to distinguish unset from 0).
  44. *
  45. * A branch holds an array of layer pointers, and has height > 0, and always
  46. * has at least 2 layers (either branches or leaves) below it.
  47. *
  48. * For example,
  49. * for x in
  50. * 0 1 2 0x10 0x11 0x200 0x201
  51. * 0x500000 0x500001 0x503000 0x503001
  52. * 0xE<<60:
  53. * i915_syncmap_set(&sync, x, lower_32_bits(x));
  54. * will build a tree like:
  55. * 0xXXXXXXXXXXXXXXXX
  56. * 0-> 0x0000000000XXXXXX
  57. * | 0-> 0x0000000000000XXX
  58. * | | 0-> 0x00000000000000XX
  59. * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2
  60. * | | | 1-> 0x000000000000001X 0:10, 1:11
  61. * | | 2-> 0x000000000000020X 0:200, 1:201
  62. * | 5-> 0x000000000050XXXX
  63. * | 0-> 0x000000000050000X 0:500000, 1:500001
  64. * | 3-> 0x000000000050300X 0:503000, 1:503001
  65. * e-> 0xe00000000000000X e:e
  66. */
  67. struct i915_syncmap {
  68. u64 prefix;
  69. unsigned int height;
  70. unsigned int bitmap;
  71. struct i915_syncmap *parent;
  72. union {
  73. DECLARE_FLEX_ARRAY(u32, seqno);
  74. DECLARE_FLEX_ARRAY(struct i915_syncmap *, child);
  75. };
  76. };
  77. /**
  78. * i915_syncmap_init -- initialise the #i915_syncmap
  79. * @root: pointer to the #i915_syncmap
  80. */
  81. void i915_syncmap_init(struct i915_syncmap **root)
  82. {
  83. BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
  84. BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
  85. BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap));
  86. *root = NULL;
  87. }
  88. static inline u32 *__sync_seqno(struct i915_syncmap *p)
  89. {
  90. GEM_BUG_ON(p->height);
  91. return p->seqno;
  92. }
  93. static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
  94. {
  95. GEM_BUG_ON(!p->height);
  96. return p->child;
  97. }
  98. static inline unsigned int
  99. __sync_branch_idx(const struct i915_syncmap *p, u64 id)
  100. {
  101. return (id >> p->height) & MASK;
  102. }
  103. static inline unsigned int
  104. __sync_leaf_idx(const struct i915_syncmap *p, u64 id)
  105. {
  106. GEM_BUG_ON(p->height);
  107. return id & MASK;
  108. }
  109. static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
  110. {
  111. return id >> p->height >> SHIFT;
  112. }
  113. static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
  114. {
  115. GEM_BUG_ON(p->height);
  116. return id >> SHIFT;
  117. }
  118. static inline bool seqno_later(u32 a, u32 b)
  119. {
  120. return (s32)(a - b) >= 0;
  121. }
  122. /**
  123. * i915_syncmap_is_later -- compare against the last know sync point
  124. * @root: pointer to the #i915_syncmap
  125. * @id: the context id (other timeline) we are synchronising to
  126. * @seqno: the sequence number along the other timeline
  127. *
  128. * If we have already synchronised this @root timeline with another (@id) then
  129. * we can omit any repeated or earlier synchronisation requests. If the two
  130. * timelines are already coupled, we can also omit the dependency between the
  131. * two as that is already known via the timeline.
  132. *
  133. * Returns true if the two timelines are already synchronised wrt to @seqno,
  134. * false if not and the synchronisation must be emitted.
  135. */
  136. bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
  137. {
  138. struct i915_syncmap *p;
  139. unsigned int idx;
  140. p = *root;
  141. if (!p)
  142. return false;
  143. if (likely(__sync_leaf_prefix(p, id) == p->prefix))
  144. goto found;
  145. /* First climb the tree back to a parent branch */
  146. do {
  147. p = p->parent;
  148. if (!p)
  149. return false;
  150. if (__sync_branch_prefix(p, id) == p->prefix)
  151. break;
  152. } while (1);
  153. /* And then descend again until we find our leaf */
  154. do {
  155. if (!p->height)
  156. break;
  157. p = __sync_child(p)[__sync_branch_idx(p, id)];
  158. if (!p)
  159. return false;
  160. if (__sync_branch_prefix(p, id) != p->prefix)
  161. return false;
  162. } while (1);
  163. *root = p;
  164. found:
  165. idx = __sync_leaf_idx(p, id);
  166. if (!(p->bitmap & BIT(idx)))
  167. return false;
  168. return seqno_later(__sync_seqno(p)[idx], seqno);
  169. }
  170. static struct i915_syncmap *
  171. __sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
  172. {
  173. struct i915_syncmap *p;
  174. p = kmalloc_flex(*p, seqno, KSYNCMAP);
  175. if (unlikely(!p))
  176. return NULL;
  177. p->parent = parent;
  178. p->height = 0;
  179. p->bitmap = 0;
  180. p->prefix = __sync_leaf_prefix(p, id);
  181. return p;
  182. }
  183. static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
  184. {
  185. unsigned int idx = __sync_leaf_idx(p, id);
  186. p->bitmap |= BIT(idx);
  187. __sync_seqno(p)[idx] = seqno;
  188. }
  189. static inline void __sync_set_child(struct i915_syncmap *p,
  190. unsigned int idx,
  191. struct i915_syncmap *child)
  192. {
  193. p->bitmap |= BIT(idx);
  194. __sync_child(p)[idx] = child;
  195. }
  196. static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
  197. {
  198. struct i915_syncmap *p = *root;
  199. unsigned int idx;
  200. if (!p) {
  201. p = __sync_alloc_leaf(NULL, id);
  202. if (unlikely(!p))
  203. return -ENOMEM;
  204. goto found;
  205. }
  206. /* Caller handled the likely cached case */
  207. GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
  208. /* Climb back up the tree until we find a common prefix */
  209. do {
  210. if (!p->parent)
  211. break;
  212. p = p->parent;
  213. if (__sync_branch_prefix(p, id) == p->prefix)
  214. break;
  215. } while (1);
  216. /*
  217. * No shortcut, we have to descend the tree to find the right layer
  218. * containing this fence.
  219. *
  220. * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
  221. * or lower layers. Leaf nodes (height = 0) contain the fences, all
  222. * other nodes (height > 0) are internal layers that point to a lower
  223. * node. Each internal layer has at least 2 descendents.
  224. *
  225. * Starting at the top, we check whether the current prefix matches. If
  226. * it doesn't, we have gone past our target and need to insert a join
  227. * into the tree, and a new leaf node for the target as a descendent
  228. * of the join, as well as the original layer.
  229. *
  230. * The matching prefix means we are still following the right branch
  231. * of the tree. If it has height 0, we have found our leaf and just
  232. * need to replace the fence slot with ourselves. If the height is
  233. * not zero, our slot contains the next layer in the tree (unless
  234. * it is empty, in which case we can add ourselves as a new leaf).
  235. * As descend the tree the prefix grows (and height decreases).
  236. */
  237. do {
  238. struct i915_syncmap *next;
  239. if (__sync_branch_prefix(p, id) != p->prefix) {
  240. unsigned int above;
  241. /* Insert a join above the current layer */
  242. next = kzalloc_flex(*next, child, KSYNCMAP);
  243. if (unlikely(!next))
  244. return -ENOMEM;
  245. /* Compute the height at which these two diverge */
  246. above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
  247. above = round_up(above, SHIFT);
  248. next->height = above + p->height;
  249. next->prefix = __sync_branch_prefix(next, id);
  250. /* Insert the join into the parent */
  251. if (p->parent) {
  252. idx = __sync_branch_idx(p->parent, id);
  253. __sync_child(p->parent)[idx] = next;
  254. GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
  255. }
  256. next->parent = p->parent;
  257. /* Compute the idx of the other branch, not our id! */
  258. idx = p->prefix >> (above - SHIFT) & MASK;
  259. __sync_set_child(next, idx, p);
  260. p->parent = next;
  261. /* Ascend to the join */
  262. p = next;
  263. } else {
  264. if (!p->height)
  265. break;
  266. }
  267. /* Descend into the next layer */
  268. GEM_BUG_ON(!p->height);
  269. idx = __sync_branch_idx(p, id);
  270. next = __sync_child(p)[idx];
  271. if (!next) {
  272. next = __sync_alloc_leaf(p, id);
  273. if (unlikely(!next))
  274. return -ENOMEM;
  275. __sync_set_child(p, idx, next);
  276. p = next;
  277. break;
  278. }
  279. p = next;
  280. } while (1);
  281. found:
  282. GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
  283. __sync_set_seqno(p, id, seqno);
  284. *root = p;
  285. return 0;
  286. }
  287. /**
  288. * i915_syncmap_set -- mark the most recent syncpoint between contexts
  289. * @root: pointer to the #i915_syncmap
  290. * @id: the context id (other timeline) we have synchronised to
  291. * @seqno: the sequence number along the other timeline
  292. *
  293. * When we synchronise this @root timeline with another (@id), we also know
  294. * that we have synchronized with all previous seqno along that timeline. If
  295. * we then have a request to synchronise with the same seqno or older, we can
  296. * omit it, see i915_syncmap_is_later()
  297. *
  298. * Returns 0 on success, or a negative error code.
  299. */
  300. int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
  301. {
  302. struct i915_syncmap *p = *root;
  303. /*
  304. * We expect to be called in sequence following is_later(id), which
  305. * should have preloaded the root for us.
  306. */
  307. if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
  308. __sync_set_seqno(p, id, seqno);
  309. return 0;
  310. }
  311. return __sync_set(root, id, seqno);
  312. }
  313. static void __sync_free(struct i915_syncmap *p)
  314. {
  315. if (p->height) {
  316. unsigned int i;
  317. while ((i = ffs(p->bitmap))) {
  318. p->bitmap &= ~0u << i;
  319. __sync_free(__sync_child(p)[i - 1]);
  320. }
  321. }
  322. kfree(p);
  323. }
  324. /**
  325. * i915_syncmap_free -- free all memory associated with the syncmap
  326. * @root: pointer to the #i915_syncmap
  327. *
  328. * Either when the timeline is to be freed and we no longer need the sync
  329. * point tracking, or when the fences are all known to be signaled and the
  330. * sync point tracking is redundant, we can free the #i915_syncmap to recover
  331. * its allocations.
  332. *
  333. * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
  334. * reuse.
  335. */
  336. void i915_syncmap_free(struct i915_syncmap **root)
  337. {
  338. struct i915_syncmap *p;
  339. p = *root;
  340. if (!p)
  341. return;
  342. while (p->parent)
  343. p = p->parent;
  344. __sync_free(p);
  345. *root = NULL;
  346. }
  347. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  348. #include "selftests/i915_syncmap.c"
  349. #endif