i915_active.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2019 Intel Corporation
  5. */
  6. #include <linux/debugobjects.h>
  7. #include "gt/intel_context.h"
  8. #include "gt/intel_engine_heartbeat.h"
  9. #include "gt/intel_engine_pm.h"
  10. #include "gt/intel_ring.h"
  11. #include "i915_drv.h"
  12. #include "i915_active.h"
  13. /*
  14. * Active refs memory management
  15. *
  16. * To be more economical with memory, we reap all the i915_active trees as
  17. * they idle (when we know the active requests are inactive) and allocate the
  18. * nodes from a local slab cache to hopefully reduce the fragmentation.
  19. */
  20. static struct kmem_cache *slab_cache;
  21. struct active_node {
  22. struct rb_node node;
  23. struct i915_active_fence base;
  24. struct i915_active *ref;
  25. u64 timeline;
  26. };
  27. #define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
  28. static inline struct active_node *
  29. node_from_active(struct i915_active_fence *active)
  30. {
  31. return container_of(active, struct active_node, base);
  32. }
  33. #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
  34. static inline bool is_barrier(const struct i915_active_fence *active)
  35. {
  36. return IS_ERR(rcu_access_pointer(active->fence));
  37. }
  38. static inline struct llist_node *barrier_to_ll(struct active_node *node)
  39. {
  40. GEM_BUG_ON(!is_barrier(&node->base));
  41. return (struct llist_node *)&node->base.cb.node;
  42. }
  43. static inline struct intel_engine_cs *
  44. __barrier_to_engine(struct active_node *node)
  45. {
  46. return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
  47. }
  48. static inline struct intel_engine_cs *
  49. barrier_to_engine(struct active_node *node)
  50. {
  51. GEM_BUG_ON(!is_barrier(&node->base));
  52. return __barrier_to_engine(node);
  53. }
  54. static inline struct active_node *barrier_from_ll(struct llist_node *x)
  55. {
  56. return container_of((struct list_head *)x,
  57. struct active_node, base.cb.node);
  58. }
  59. #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
  60. static void *active_debug_hint(void *addr)
  61. {
  62. struct i915_active *ref = addr;
  63. return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
  64. }
  65. static const struct debug_obj_descr active_debug_desc = {
  66. .name = "i915_active",
  67. .debug_hint = active_debug_hint,
  68. };
  69. static void debug_active_init(struct i915_active *ref)
  70. {
  71. debug_object_init(ref, &active_debug_desc);
  72. }
  73. static void debug_active_activate(struct i915_active *ref)
  74. {
  75. lockdep_assert_held(&ref->tree_lock);
  76. debug_object_activate(ref, &active_debug_desc);
  77. }
  78. static void debug_active_deactivate(struct i915_active *ref)
  79. {
  80. lockdep_assert_held(&ref->tree_lock);
  81. if (!atomic_read(&ref->count)) /* after the last dec */
  82. debug_object_deactivate(ref, &active_debug_desc);
  83. }
  84. static void debug_active_fini(struct i915_active *ref)
  85. {
  86. debug_object_free(ref, &active_debug_desc);
  87. }
  88. static void debug_active_assert(struct i915_active *ref)
  89. {
  90. debug_object_assert_init(ref, &active_debug_desc);
  91. }
  92. #else
  93. static inline void debug_active_init(struct i915_active *ref) { }
  94. static inline void debug_active_activate(struct i915_active *ref) { }
  95. static inline void debug_active_deactivate(struct i915_active *ref) { }
  96. static inline void debug_active_fini(struct i915_active *ref) { }
  97. static inline void debug_active_assert(struct i915_active *ref) { }
  98. #endif
  99. static void
  100. __active_retire(struct i915_active *ref)
  101. {
  102. struct rb_root root = RB_ROOT;
  103. struct active_node *it, *n;
  104. unsigned long flags;
  105. GEM_BUG_ON(i915_active_is_idle(ref));
  106. /* return the unused nodes to our slabcache -- flushing the allocator */
  107. if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
  108. return;
  109. GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
  110. debug_active_deactivate(ref);
  111. /* Even if we have not used the cache, we may still have a barrier */
  112. if (!ref->cache)
  113. ref->cache = fetch_node(ref->tree.rb_node);
  114. /* Keep the MRU cached node for reuse */
  115. if (ref->cache) {
  116. /* Discard all other nodes in the tree */
  117. rb_erase(&ref->cache->node, &ref->tree);
  118. root = ref->tree;
  119. /* Rebuild the tree with only the cached node */
  120. rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
  121. rb_insert_color(&ref->cache->node, &ref->tree);
  122. GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
  123. /* Make the cached node available for reuse with any timeline */
  124. ref->cache->timeline = 0; /* needs cmpxchg(u64) */
  125. }
  126. spin_unlock_irqrestore(&ref->tree_lock, flags);
  127. /* After the final retire, the entire struct may be freed */
  128. if (ref->retire)
  129. ref->retire(ref);
  130. /* ... except if you wait on it, you must manage your own references! */
  131. wake_up_var(ref);
  132. /* Finally free the discarded timeline tree */
  133. rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
  134. GEM_BUG_ON(i915_active_fence_isset(&it->base));
  135. kmem_cache_free(slab_cache, it);
  136. }
  137. }
  138. static void
  139. active_work(struct work_struct *wrk)
  140. {
  141. struct i915_active *ref = container_of(wrk, typeof(*ref), work);
  142. GEM_BUG_ON(!atomic_read(&ref->count));
  143. if (atomic_add_unless(&ref->count, -1, 1))
  144. return;
  145. __active_retire(ref);
  146. }
  147. static void
  148. active_retire(struct i915_active *ref)
  149. {
  150. GEM_BUG_ON(!atomic_read(&ref->count));
  151. if (atomic_add_unless(&ref->count, -1, 1))
  152. return;
  153. if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
  154. queue_work(system_unbound_wq, &ref->work);
  155. return;
  156. }
  157. __active_retire(ref);
  158. }
  159. static inline struct dma_fence **
  160. __active_fence_slot(struct i915_active_fence *active)
  161. {
  162. return (struct dma_fence ** __force)&active->fence;
  163. }
  164. static inline bool
  165. active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
  166. {
  167. struct i915_active_fence *active =
  168. container_of(cb, typeof(*active), cb);
  169. return try_cmpxchg(__active_fence_slot(active), &fence, NULL);
  170. }
  171. static void
  172. node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
  173. {
  174. if (active_fence_cb(fence, cb))
  175. active_retire(container_of(cb, struct active_node, base.cb)->ref);
  176. }
  177. static void
  178. excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
  179. {
  180. if (active_fence_cb(fence, cb))
  181. active_retire(container_of(cb, struct i915_active, excl.cb));
  182. }
  183. static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
  184. {
  185. struct active_node *it;
  186. GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */
  187. /*
  188. * We track the most recently used timeline to skip a rbtree search
  189. * for the common case, under typical loads we never need the rbtree
  190. * at all. We can reuse the last slot if it is empty, that is
  191. * after the previous activity has been retired, or if it matches the
  192. * current timeline.
  193. */
  194. it = READ_ONCE(ref->cache);
  195. if (it) {
  196. u64 cached = READ_ONCE(it->timeline);
  197. /* Once claimed, this slot will only belong to this idx */
  198. if (cached == idx)
  199. return it;
  200. /*
  201. * An unclaimed cache [.timeline=0] can only be claimed once.
  202. *
  203. * If the value is already non-zero, some other thread has
  204. * claimed the cache and we know that is does not match our
  205. * idx. If, and only if, the timeline is currently zero is it
  206. * worth competing to claim it atomically for ourselves (for
  207. * only the winner of that race will cmpxchg succeed).
  208. */
  209. if (!cached && try_cmpxchg64(&it->timeline, &cached, idx))
  210. return it;
  211. }
  212. BUILD_BUG_ON(offsetof(typeof(*it), node));
  213. /* While active, the tree can only be built; not destroyed */
  214. GEM_BUG_ON(i915_active_is_idle(ref));
  215. it = fetch_node(ref->tree.rb_node);
  216. while (it) {
  217. if (it->timeline < idx) {
  218. it = fetch_node(it->node.rb_right);
  219. } else if (it->timeline > idx) {
  220. it = fetch_node(it->node.rb_left);
  221. } else {
  222. WRITE_ONCE(ref->cache, it);
  223. break;
  224. }
  225. }
  226. /* NB: If the tree rotated beneath us, we may miss our target. */
  227. return it;
  228. }
  229. static struct i915_active_fence *
  230. active_instance(struct i915_active *ref, u64 idx)
  231. {
  232. struct active_node *node;
  233. struct rb_node **p, *parent;
  234. node = __active_lookup(ref, idx);
  235. if (likely(node))
  236. return &node->base;
  237. spin_lock_irq(&ref->tree_lock);
  238. GEM_BUG_ON(i915_active_is_idle(ref));
  239. parent = NULL;
  240. p = &ref->tree.rb_node;
  241. while (*p) {
  242. parent = *p;
  243. node = rb_entry(parent, struct active_node, node);
  244. if (node->timeline == idx)
  245. goto out;
  246. if (node->timeline < idx)
  247. p = &parent->rb_right;
  248. else
  249. p = &parent->rb_left;
  250. }
  251. /*
  252. * XXX: We should preallocate this before i915_active_ref() is ever
  253. * called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC.
  254. */
  255. node = kmem_cache_alloc(slab_cache, GFP_ATOMIC);
  256. if (!node)
  257. goto out;
  258. __i915_active_fence_init(&node->base, NULL, node_retire);
  259. node->ref = ref;
  260. node->timeline = idx;
  261. rb_link_node(&node->node, parent, p);
  262. rb_insert_color(&node->node, &ref->tree);
  263. out:
  264. WRITE_ONCE(ref->cache, node);
  265. spin_unlock_irq(&ref->tree_lock);
  266. return &node->base;
  267. }
  268. void __i915_active_init(struct i915_active *ref,
  269. int (*active)(struct i915_active *ref),
  270. void (*retire)(struct i915_active *ref),
  271. unsigned long flags,
  272. struct lock_class_key *mkey,
  273. struct lock_class_key *wkey)
  274. {
  275. debug_active_init(ref);
  276. ref->flags = flags;
  277. ref->active = active;
  278. ref->retire = retire;
  279. spin_lock_init(&ref->tree_lock);
  280. ref->tree = RB_ROOT;
  281. ref->cache = NULL;
  282. init_llist_head(&ref->preallocated_barriers);
  283. atomic_set(&ref->count, 0);
  284. __mutex_init(&ref->mutex, "i915_active", mkey);
  285. __i915_active_fence_init(&ref->excl, NULL, excl_retire);
  286. INIT_WORK(&ref->work, active_work);
  287. #if IS_ENABLED(CONFIG_LOCKDEP)
  288. lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
  289. #endif
  290. }
  291. static bool ____active_del_barrier(struct i915_active *ref,
  292. struct active_node *node,
  293. struct intel_engine_cs *engine)
  294. {
  295. struct llist_node *head = NULL, *tail = NULL;
  296. struct llist_node *pos, *next;
  297. GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
  298. /*
  299. * Rebuild the llist excluding our node. We may perform this
  300. * outside of the kernel_context timeline mutex and so someone
  301. * else may be manipulating the engine->barrier_tasks, in
  302. * which case either we or they will be upset :)
  303. *
  304. * A second __active_del_barrier() will report failure to claim
  305. * the active_node and the caller will just shrug and know not to
  306. * claim ownership of its node.
  307. *
  308. * A concurrent i915_request_add_active_barriers() will miss adding
  309. * any of the tasks, but we will try again on the next -- and since
  310. * we are actively using the barrier, we know that there will be
  311. * at least another opportunity when we idle.
  312. */
  313. llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
  314. if (node == barrier_from_ll(pos)) {
  315. node = NULL;
  316. continue;
  317. }
  318. pos->next = head;
  319. head = pos;
  320. if (!tail)
  321. tail = pos;
  322. }
  323. if (head)
  324. llist_add_batch(head, tail, &engine->barrier_tasks);
  325. return !node;
  326. }
  327. static bool
  328. __active_del_barrier(struct i915_active *ref, struct active_node *node)
  329. {
  330. return ____active_del_barrier(ref, node, barrier_to_engine(node));
  331. }
  332. static bool
  333. replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
  334. {
  335. if (!is_barrier(active)) /* proto-node used by our idle barrier? */
  336. return false;
  337. /*
  338. * This request is on the kernel_context timeline, and so
  339. * we can use it to substitute for the pending idle-barrer
  340. * request that we want to emit on the kernel_context.
  341. */
  342. return __active_del_barrier(ref, node_from_active(active));
  343. }
  344. int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
  345. {
  346. u64 idx = i915_request_timeline(rq)->fence_context;
  347. struct dma_fence *fence = &rq->fence;
  348. struct i915_active_fence *active;
  349. int err;
  350. /* Prevent reaping in case we malloc/wait while building the tree */
  351. err = i915_active_acquire(ref);
  352. if (err)
  353. return err;
  354. do {
  355. active = active_instance(ref, idx);
  356. if (!active) {
  357. err = -ENOMEM;
  358. goto out;
  359. }
  360. if (replace_barrier(ref, active)) {
  361. RCU_INIT_POINTER(active->fence, NULL);
  362. atomic_dec(&ref->count);
  363. }
  364. } while (unlikely(is_barrier(active)));
  365. fence = __i915_active_fence_set(active, fence);
  366. if (!fence)
  367. __i915_active_acquire(ref);
  368. else
  369. dma_fence_put(fence);
  370. out:
  371. i915_active_release(ref);
  372. return err;
  373. }
  374. static struct dma_fence *
  375. __i915_active_set_fence(struct i915_active *ref,
  376. struct i915_active_fence *active,
  377. struct dma_fence *fence)
  378. {
  379. struct dma_fence *prev;
  380. if (replace_barrier(ref, active)) {
  381. RCU_INIT_POINTER(active->fence, fence);
  382. return NULL;
  383. }
  384. prev = __i915_active_fence_set(active, fence);
  385. if (!prev)
  386. __i915_active_acquire(ref);
  387. return prev;
  388. }
  389. struct dma_fence *
  390. i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
  391. {
  392. /* We expect the caller to manage the exclusive timeline ordering */
  393. return __i915_active_set_fence(ref, &ref->excl, f);
  394. }
  395. bool i915_active_acquire_if_busy(struct i915_active *ref)
  396. {
  397. debug_active_assert(ref);
  398. return atomic_add_unless(&ref->count, 1, 0);
  399. }
  400. static void __i915_active_activate(struct i915_active *ref)
  401. {
  402. spin_lock_irq(&ref->tree_lock); /* __active_retire() */
  403. if (!atomic_fetch_inc(&ref->count))
  404. debug_active_activate(ref);
  405. spin_unlock_irq(&ref->tree_lock);
  406. }
  407. int i915_active_acquire(struct i915_active *ref)
  408. {
  409. int err;
  410. if (i915_active_acquire_if_busy(ref))
  411. return 0;
  412. if (!ref->active) {
  413. __i915_active_activate(ref);
  414. return 0;
  415. }
  416. err = mutex_lock_interruptible(&ref->mutex);
  417. if (err)
  418. return err;
  419. if (likely(!i915_active_acquire_if_busy(ref))) {
  420. err = ref->active(ref);
  421. if (!err)
  422. __i915_active_activate(ref);
  423. }
  424. mutex_unlock(&ref->mutex);
  425. return err;
  426. }
  427. void i915_active_release(struct i915_active *ref)
  428. {
  429. debug_active_assert(ref);
  430. active_retire(ref);
  431. }
  432. static void enable_signaling(struct i915_active_fence *active)
  433. {
  434. struct dma_fence *fence;
  435. if (unlikely(is_barrier(active)))
  436. return;
  437. fence = i915_active_fence_get(active);
  438. if (!fence)
  439. return;
  440. dma_fence_enable_sw_signaling(fence);
  441. dma_fence_put(fence);
  442. }
  443. static int flush_barrier(struct active_node *it)
  444. {
  445. struct intel_engine_cs *engine;
  446. if (likely(!is_barrier(&it->base)))
  447. return 0;
  448. engine = __barrier_to_engine(it);
  449. smp_rmb(); /* serialise with add_active_barriers */
  450. if (!is_barrier(&it->base))
  451. return 0;
  452. return intel_engine_flush_barriers(engine);
  453. }
  454. static int flush_lazy_signals(struct i915_active *ref)
  455. {
  456. struct active_node *it, *n;
  457. int err = 0;
  458. enable_signaling(&ref->excl);
  459. rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
  460. err = flush_barrier(it); /* unconnected idle barrier? */
  461. if (err)
  462. break;
  463. enable_signaling(&it->base);
  464. }
  465. return err;
  466. }
  467. int __i915_active_wait(struct i915_active *ref, int state)
  468. {
  469. might_sleep();
  470. /* Any fence added after the wait begins will not be auto-signaled */
  471. if (i915_active_acquire_if_busy(ref)) {
  472. int err;
  473. err = flush_lazy_signals(ref);
  474. i915_active_release(ref);
  475. if (err)
  476. return err;
  477. if (___wait_var_event(ref, i915_active_is_idle(ref),
  478. state, 0, 0, schedule()))
  479. return -EINTR;
  480. }
  481. /*
  482. * After the wait is complete, the caller may free the active.
  483. * We have to flush any concurrent retirement before returning.
  484. */
  485. flush_work(&ref->work);
  486. return 0;
  487. }
  488. static int __await_active(struct i915_active_fence *active,
  489. int (*fn)(void *arg, struct dma_fence *fence),
  490. void *arg)
  491. {
  492. struct dma_fence *fence;
  493. if (is_barrier(active)) /* XXX flush the barrier? */
  494. return 0;
  495. fence = i915_active_fence_get(active);
  496. if (fence) {
  497. int err;
  498. err = fn(arg, fence);
  499. dma_fence_put(fence);
  500. if (err < 0)
  501. return err;
  502. }
  503. return 0;
  504. }
  505. struct wait_barrier {
  506. struct wait_queue_entry base;
  507. struct i915_active *ref;
  508. };
  509. static int
  510. barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key)
  511. {
  512. struct wait_barrier *wb = container_of(wq, typeof(*wb), base);
  513. if (i915_active_is_idle(wb->ref)) {
  514. list_del(&wq->entry);
  515. i915_sw_fence_complete(wq->private);
  516. kfree(wq);
  517. }
  518. return 0;
  519. }
  520. static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence)
  521. {
  522. struct wait_barrier *wb;
  523. wb = kmalloc_obj(*wb);
  524. if (unlikely(!wb))
  525. return -ENOMEM;
  526. GEM_BUG_ON(i915_active_is_idle(ref));
  527. if (!i915_sw_fence_await(fence)) {
  528. kfree(wb);
  529. return -EINVAL;
  530. }
  531. wb->base.flags = 0;
  532. wb->base.func = barrier_wake;
  533. wb->base.private = fence;
  534. wb->ref = ref;
  535. add_wait_queue(__var_waitqueue(ref), &wb->base);
  536. return 0;
  537. }
  538. static int await_active(struct i915_active *ref,
  539. unsigned int flags,
  540. int (*fn)(void *arg, struct dma_fence *fence),
  541. void *arg, struct i915_sw_fence *barrier)
  542. {
  543. int err = 0;
  544. if (!i915_active_acquire_if_busy(ref))
  545. return 0;
  546. if (flags & I915_ACTIVE_AWAIT_EXCL &&
  547. rcu_access_pointer(ref->excl.fence)) {
  548. err = __await_active(&ref->excl, fn, arg);
  549. if (err)
  550. goto out;
  551. }
  552. if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
  553. struct active_node *it, *n;
  554. rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
  555. err = __await_active(&it->base, fn, arg);
  556. if (err)
  557. goto out;
  558. }
  559. }
  560. if (flags & I915_ACTIVE_AWAIT_BARRIER) {
  561. err = flush_lazy_signals(ref);
  562. if (err)
  563. goto out;
  564. err = __await_barrier(ref, barrier);
  565. if (err)
  566. goto out;
  567. }
  568. out:
  569. i915_active_release(ref);
  570. return err;
  571. }
  572. static int rq_await_fence(void *arg, struct dma_fence *fence)
  573. {
  574. return i915_request_await_dma_fence(arg, fence);
  575. }
  576. int i915_request_await_active(struct i915_request *rq,
  577. struct i915_active *ref,
  578. unsigned int flags)
  579. {
  580. return await_active(ref, flags, rq_await_fence, rq, &rq->submit);
  581. }
  582. static int sw_await_fence(void *arg, struct dma_fence *fence)
  583. {
  584. return i915_sw_fence_await_dma_fence(arg, fence, 0,
  585. GFP_NOWAIT | __GFP_NOWARN);
  586. }
  587. int i915_sw_fence_await_active(struct i915_sw_fence *fence,
  588. struct i915_active *ref,
  589. unsigned int flags)
  590. {
  591. return await_active(ref, flags, sw_await_fence, fence, fence);
  592. }
  593. void i915_active_fini(struct i915_active *ref)
  594. {
  595. debug_active_fini(ref);
  596. GEM_BUG_ON(atomic_read(&ref->count));
  597. GEM_BUG_ON(work_pending(&ref->work));
  598. mutex_destroy(&ref->mutex);
  599. if (ref->cache)
  600. kmem_cache_free(slab_cache, ref->cache);
  601. }
  602. static inline bool is_idle_barrier(struct active_node *node, u64 idx)
  603. {
  604. return node->timeline == idx && !i915_active_fence_isset(&node->base);
  605. }
  606. static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
  607. {
  608. struct rb_node *prev, *p;
  609. if (RB_EMPTY_ROOT(&ref->tree))
  610. return NULL;
  611. GEM_BUG_ON(i915_active_is_idle(ref));
  612. /*
  613. * Try to reuse any existing barrier nodes already allocated for this
  614. * i915_active, due to overlapping active phases there is likely a
  615. * node kept alive (as we reuse before parking). We prefer to reuse
  616. * completely idle barriers (less hassle in manipulating the llists),
  617. * but otherwise any will do.
  618. */
  619. if (ref->cache && is_idle_barrier(ref->cache, idx)) {
  620. p = &ref->cache->node;
  621. goto match;
  622. }
  623. prev = NULL;
  624. p = ref->tree.rb_node;
  625. while (p) {
  626. struct active_node *node =
  627. rb_entry(p, struct active_node, node);
  628. if (is_idle_barrier(node, idx))
  629. goto match;
  630. prev = p;
  631. if (node->timeline < idx)
  632. p = READ_ONCE(p->rb_right);
  633. else
  634. p = READ_ONCE(p->rb_left);
  635. }
  636. /*
  637. * No quick match, but we did find the leftmost rb_node for the
  638. * kernel_context. Walk the rb_tree in-order to see if there were
  639. * any idle-barriers on this timeline that we missed, or just use
  640. * the first pending barrier.
  641. */
  642. for (p = prev; p; p = rb_next(p)) {
  643. struct active_node *node =
  644. rb_entry(p, struct active_node, node);
  645. struct intel_engine_cs *engine;
  646. if (node->timeline > idx)
  647. break;
  648. if (node->timeline < idx)
  649. continue;
  650. if (is_idle_barrier(node, idx))
  651. goto match;
  652. /*
  653. * The list of pending barriers is protected by the
  654. * kernel_context timeline, which notably we do not hold
  655. * here. i915_request_add_active_barriers() may consume
  656. * the barrier before we claim it, so we have to check
  657. * for success.
  658. */
  659. engine = __barrier_to_engine(node);
  660. smp_rmb(); /* serialise with add_active_barriers */
  661. if (is_barrier(&node->base) &&
  662. ____active_del_barrier(ref, node, engine))
  663. goto match;
  664. }
  665. return NULL;
  666. match:
  667. spin_lock_irq(&ref->tree_lock);
  668. rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
  669. if (p == &ref->cache->node)
  670. WRITE_ONCE(ref->cache, NULL);
  671. spin_unlock_irq(&ref->tree_lock);
  672. return rb_entry(p, struct active_node, node);
  673. }
  674. int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
  675. struct intel_engine_cs *engine)
  676. {
  677. intel_engine_mask_t tmp, mask = engine->mask;
  678. struct llist_node *first = NULL, *last = NULL;
  679. struct intel_gt *gt = engine->gt;
  680. GEM_BUG_ON(i915_active_is_idle(ref));
  681. /* Wait until the previous preallocation is completed */
  682. while (!llist_empty(&ref->preallocated_barriers))
  683. cond_resched();
  684. /*
  685. * Preallocate a node for each physical engine supporting the target
  686. * engine (remember virtual engines have more than one sibling).
  687. * We can then use the preallocated nodes in
  688. * i915_active_acquire_barrier()
  689. */
  690. GEM_BUG_ON(!mask);
  691. for_each_engine_masked(engine, gt, mask, tmp) {
  692. u64 idx = engine->kernel_context->timeline->fence_context;
  693. struct llist_node *prev = first;
  694. struct active_node *node;
  695. rcu_read_lock();
  696. node = reuse_idle_barrier(ref, idx);
  697. rcu_read_unlock();
  698. if (!node) {
  699. node = kmem_cache_alloc(slab_cache, GFP_KERNEL);
  700. if (!node)
  701. goto unwind;
  702. RCU_INIT_POINTER(node->base.fence, NULL);
  703. node->base.cb.func = node_retire;
  704. node->timeline = idx;
  705. node->ref = ref;
  706. }
  707. if (!i915_active_fence_isset(&node->base)) {
  708. /*
  709. * Mark this as being *our* unconnected proto-node.
  710. *
  711. * Since this node is not in any list, and we have
  712. * decoupled it from the rbtree, we can reuse the
  713. * request to indicate this is an idle-barrier node
  714. * and then we can use the rb_node and list pointers
  715. * for our tracking of the pending barrier.
  716. */
  717. RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
  718. node->base.cb.node.prev = (void *)engine;
  719. __i915_active_acquire(ref);
  720. }
  721. GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
  722. GEM_BUG_ON(barrier_to_engine(node) != engine);
  723. first = barrier_to_ll(node);
  724. first->next = prev;
  725. if (!last)
  726. last = first;
  727. intel_engine_pm_get(engine);
  728. }
  729. GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
  730. llist_add_batch(first, last, &ref->preallocated_barriers);
  731. return 0;
  732. unwind:
  733. while (first) {
  734. struct active_node *node = barrier_from_ll(first);
  735. first = first->next;
  736. atomic_dec(&ref->count);
  737. intel_engine_pm_put(barrier_to_engine(node));
  738. kmem_cache_free(slab_cache, node);
  739. }
  740. return -ENOMEM;
  741. }
  742. void i915_active_acquire_barrier(struct i915_active *ref)
  743. {
  744. struct llist_node *pos, *next;
  745. unsigned long flags;
  746. GEM_BUG_ON(i915_active_is_idle(ref));
  747. /*
  748. * Transfer the list of preallocated barriers into the
  749. * i915_active rbtree, but only as proto-nodes. They will be
  750. * populated by i915_request_add_active_barriers() to point to the
  751. * request that will eventually release them.
  752. */
  753. llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
  754. struct active_node *node = barrier_from_ll(pos);
  755. struct intel_engine_cs *engine = barrier_to_engine(node);
  756. struct rb_node **p, *parent;
  757. spin_lock_irqsave_nested(&ref->tree_lock, flags,
  758. SINGLE_DEPTH_NESTING);
  759. parent = NULL;
  760. p = &ref->tree.rb_node;
  761. while (*p) {
  762. struct active_node *it;
  763. parent = *p;
  764. it = rb_entry(parent, struct active_node, node);
  765. if (it->timeline < node->timeline)
  766. p = &parent->rb_right;
  767. else
  768. p = &parent->rb_left;
  769. }
  770. rb_link_node(&node->node, parent, p);
  771. rb_insert_color(&node->node, &ref->tree);
  772. spin_unlock_irqrestore(&ref->tree_lock, flags);
  773. GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
  774. llist_add(barrier_to_ll(node), &engine->barrier_tasks);
  775. intel_engine_pm_put_delay(engine, 2);
  776. }
  777. }
  778. static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
  779. {
  780. return __active_fence_slot(&barrier_from_ll(node)->base);
  781. }
  782. void i915_request_add_active_barriers(struct i915_request *rq)
  783. {
  784. struct intel_engine_cs *engine = rq->engine;
  785. struct llist_node *node, *next;
  786. unsigned long flags;
  787. GEM_BUG_ON(!intel_context_is_barrier(rq->context));
  788. GEM_BUG_ON(intel_engine_is_virtual(engine));
  789. GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
  790. node = llist_del_all(&engine->barrier_tasks);
  791. if (!node)
  792. return;
  793. /*
  794. * Attach the list of proto-fences to the in-flight request such
  795. * that the parent i915_active will be released when this request
  796. * is retired.
  797. */
  798. spin_lock_irqsave(&rq->lock, flags);
  799. llist_for_each_safe(node, next, node) {
  800. /* serialise with reuse_idle_barrier */
  801. smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
  802. list_add_tail((struct list_head *)node, &rq->fence.cb_list);
  803. }
  804. spin_unlock_irqrestore(&rq->lock, flags);
  805. }
  806. /*
  807. * __i915_active_fence_set: Update the last active fence along its timeline
  808. * @active: the active tracker
  809. * @fence: the new fence (under construction)
  810. *
  811. * Records the new @fence as the last active fence along its timeline in
  812. * this active tracker, moving the tracking callbacks from the previous
  813. * fence onto this one. Gets and returns a reference to the previous fence
  814. * (if not already completed), which the caller must put after making sure
  815. * that it is executed before the new fence. To ensure that the order of
  816. * fences within the timeline of the i915_active_fence is understood, it
  817. * should be locked by the caller.
  818. */
  819. struct dma_fence *
  820. __i915_active_fence_set(struct i915_active_fence *active,
  821. struct dma_fence *fence)
  822. {
  823. struct dma_fence *prev;
  824. unsigned long flags;
  825. /*
  826. * In case of fences embedded in i915_requests, their memory is
  827. * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
  828. * by new requests. Then, there is a risk of passing back a pointer
  829. * to a new, completely unrelated fence that reuses the same memory
  830. * while tracked under a different active tracker. Combined with i915
  831. * perf open/close operations that build await dependencies between
  832. * engine kernel context requests and user requests from different
  833. * timelines, this can lead to dependency loops and infinite waits.
  834. *
  835. * As a countermeasure, we try to get a reference to the active->fence
  836. * first, so if we succeed and pass it back to our user then it is not
  837. * released and potentially reused by an unrelated request before the
  838. * user has a chance to set up an await dependency on it.
  839. */
  840. prev = i915_active_fence_get(active);
  841. if (fence == prev)
  842. return fence;
  843. GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
  844. /*
  845. * Consider that we have two threads arriving (A and B), with
  846. * C already resident as the active->fence.
  847. *
  848. * Both A and B have got a reference to C or NULL, depending on the
  849. * timing of the interrupt handler. Let's assume that if A has got C
  850. * then it has locked C first (before B).
  851. *
  852. * Note the strong ordering of the timeline also provides consistent
  853. * nesting rules for the fence->lock; the inner lock is always the
  854. * older lock.
  855. */
  856. spin_lock_irqsave(fence->lock, flags);
  857. if (prev)
  858. spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
  859. /*
  860. * A does the cmpxchg first, and so it sees C or NULL, as before, or
  861. * something else, depending on the timing of other threads and/or
  862. * interrupt handler. If not the same as before then A unlocks C if
  863. * applicable and retries, starting from an attempt to get a new
  864. * active->fence. Meanwhile, B follows the same path as A.
  865. * Once A succeeds with cmpxch, B fails again, retires, gets A from
  866. * active->fence, locks it as soon as A completes, and possibly
  867. * succeeds with cmpxchg.
  868. */
  869. while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
  870. if (prev) {
  871. spin_unlock(prev->lock);
  872. dma_fence_put(prev);
  873. }
  874. spin_unlock_irqrestore(fence->lock, flags);
  875. prev = i915_active_fence_get(active);
  876. GEM_BUG_ON(prev == fence);
  877. spin_lock_irqsave(fence->lock, flags);
  878. if (prev)
  879. spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
  880. }
  881. /*
  882. * If prev is NULL then the previous fence must have been signaled
  883. * and we know that we are first on the timeline. If it is still
  884. * present then, having the lock on that fence already acquired, we
  885. * serialise with the interrupt handler, in the process of removing it
  886. * from any future interrupt callback. A will then wait on C before
  887. * executing (if present).
  888. *
  889. * As B is second, it sees A as the previous fence and so waits for
  890. * it to complete its transition and takes over the occupancy for
  891. * itself -- remembering that it needs to wait on A before executing.
  892. */
  893. if (prev) {
  894. __list_del_entry(&active->cb.node);
  895. spin_unlock(prev->lock); /* serialise with prev->cb_list */
  896. }
  897. list_add_tail(&active->cb.node, &fence->cb_list);
  898. spin_unlock_irqrestore(fence->lock, flags);
  899. return prev;
  900. }
  901. int i915_active_fence_set(struct i915_active_fence *active,
  902. struct i915_request *rq)
  903. {
  904. struct dma_fence *fence;
  905. int err = 0;
  906. /* Must maintain timeline ordering wrt previous active requests */
  907. fence = __i915_active_fence_set(active, &rq->fence);
  908. if (fence) {
  909. err = i915_request_await_dma_fence(rq, fence);
  910. dma_fence_put(fence);
  911. }
  912. return err;
  913. }
  914. void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
  915. {
  916. active_fence_cb(fence, cb);
  917. }
  918. struct auto_active {
  919. struct i915_active base;
  920. struct kref ref;
  921. };
  922. struct i915_active *i915_active_get(struct i915_active *ref)
  923. {
  924. struct auto_active *aa = container_of(ref, typeof(*aa), base);
  925. kref_get(&aa->ref);
  926. return &aa->base;
  927. }
  928. static void auto_release(struct kref *ref)
  929. {
  930. struct auto_active *aa = container_of(ref, typeof(*aa), ref);
  931. i915_active_fini(&aa->base);
  932. kfree(aa);
  933. }
  934. void i915_active_put(struct i915_active *ref)
  935. {
  936. struct auto_active *aa = container_of(ref, typeof(*aa), base);
  937. kref_put(&aa->ref, auto_release);
  938. }
  939. static int auto_active(struct i915_active *ref)
  940. {
  941. i915_active_get(ref);
  942. return 0;
  943. }
  944. static void auto_retire(struct i915_active *ref)
  945. {
  946. i915_active_put(ref);
  947. }
  948. struct i915_active *i915_active_create(void)
  949. {
  950. struct auto_active *aa;
  951. aa = kmalloc_obj(*aa);
  952. if (!aa)
  953. return NULL;
  954. kref_init(&aa->ref);
  955. i915_active_init(&aa->base, auto_active, auto_retire, 0);
  956. return &aa->base;
  957. }
  958. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  959. #include "selftests/i915_active.c"
  960. #endif
  961. void i915_active_module_exit(void)
  962. {
  963. kmem_cache_destroy(slab_cache);
  964. }
  965. int __init i915_active_module_init(void)
  966. {
  967. slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
  968. if (!slab_cache)
  969. return -ENOMEM;
  970. return 0;
  971. }