intel_breadcrumbs.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. // SPDX-License-Identifier: MIT
  2. /*
  3. * Copyright © 2015-2021 Intel Corporation
  4. */
  5. #include <linux/kthread.h>
  6. #include <linux/string_helpers.h>
  7. #include <trace/events/dma_fence.h>
  8. #include <uapi/linux/sched/types.h>
  9. #include <drm/drm_print.h>
  10. #include "i915_drv.h"
  11. #include "i915_trace.h"
  12. #include "intel_breadcrumbs.h"
  13. #include "intel_context.h"
  14. #include "intel_engine_pm.h"
  15. #include "intel_gt_pm.h"
  16. #include "intel_gt_requests.h"
  17. static bool irq_enable(struct intel_breadcrumbs *b)
  18. {
  19. return intel_engine_irq_enable(b->irq_engine);
  20. }
  21. static void irq_disable(struct intel_breadcrumbs *b)
  22. {
  23. intel_engine_irq_disable(b->irq_engine);
  24. }
  25. static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
  26. {
  27. intel_wakeref_t wakeref;
  28. /*
  29. * Since we are waiting on a request, the GPU should be busy
  30. * and should have its own rpm reference.
  31. */
  32. wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
  33. if (GEM_WARN_ON(!wakeref))
  34. return;
  35. /*
  36. * The breadcrumb irq will be disarmed on the interrupt after the
  37. * waiters are signaled. This gives us a single interrupt window in
  38. * which we can add a new waiter and avoid the cost of re-enabling
  39. * the irq.
  40. */
  41. WRITE_ONCE(b->irq_armed, wakeref);
  42. /* Requests may have completed before we could enable the interrupt. */
  43. if (!b->irq_enabled++ && b->irq_enable(b))
  44. irq_work_queue(&b->irq_work);
  45. }
  46. static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
  47. {
  48. if (!b->irq_engine)
  49. return;
  50. spin_lock(&b->irq_lock);
  51. if (!b->irq_armed)
  52. __intel_breadcrumbs_arm_irq(b);
  53. spin_unlock(&b->irq_lock);
  54. }
  55. static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
  56. {
  57. intel_wakeref_t wakeref = b->irq_armed;
  58. GEM_BUG_ON(!b->irq_enabled);
  59. if (!--b->irq_enabled)
  60. b->irq_disable(b);
  61. WRITE_ONCE(b->irq_armed, NULL);
  62. intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
  63. }
  64. static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
  65. {
  66. spin_lock(&b->irq_lock);
  67. if (b->irq_armed)
  68. __intel_breadcrumbs_disarm_irq(b);
  69. spin_unlock(&b->irq_lock);
  70. }
  71. static void add_signaling_context(struct intel_breadcrumbs *b,
  72. struct intel_context *ce)
  73. {
  74. lockdep_assert_held(&ce->signal_lock);
  75. spin_lock(&b->signalers_lock);
  76. list_add_rcu(&ce->signal_link, &b->signalers);
  77. spin_unlock(&b->signalers_lock);
  78. }
  79. static bool remove_signaling_context(struct intel_breadcrumbs *b,
  80. struct intel_context *ce)
  81. {
  82. lockdep_assert_held(&ce->signal_lock);
  83. if (!list_empty(&ce->signals))
  84. return false;
  85. spin_lock(&b->signalers_lock);
  86. list_del_rcu(&ce->signal_link);
  87. spin_unlock(&b->signalers_lock);
  88. return true;
  89. }
  90. __maybe_unused static bool
  91. check_signal_order(struct intel_context *ce, struct i915_request *rq)
  92. {
  93. if (rq->context != ce)
  94. return false;
  95. if (!list_is_last(&rq->signal_link, &ce->signals) &&
  96. i915_seqno_passed(rq->fence.seqno,
  97. list_next_entry(rq, signal_link)->fence.seqno))
  98. return false;
  99. if (!list_is_first(&rq->signal_link, &ce->signals) &&
  100. i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
  101. rq->fence.seqno))
  102. return false;
  103. return true;
  104. }
  105. static bool
  106. __dma_fence_signal(struct dma_fence *fence)
  107. {
  108. return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
  109. }
  110. static void
  111. __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
  112. {
  113. fence->timestamp = timestamp;
  114. set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
  115. trace_dma_fence_signaled(fence);
  116. }
  117. static void
  118. __dma_fence_signal__notify(struct dma_fence *fence,
  119. const struct list_head *list)
  120. {
  121. struct dma_fence_cb *cur, *tmp;
  122. lockdep_assert_held(fence->lock);
  123. list_for_each_entry_safe(cur, tmp, list, node) {
  124. INIT_LIST_HEAD(&cur->node);
  125. cur->func(fence, cur);
  126. }
  127. }
  128. static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
  129. {
  130. if (b->irq_engine)
  131. intel_engine_add_retire(b->irq_engine, tl);
  132. }
  133. static struct llist_node *
  134. slist_add(struct llist_node *node, struct llist_node *head)
  135. {
  136. node->next = head;
  137. return node;
  138. }
  139. static void signal_irq_work(struct irq_work *work)
  140. {
  141. struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
  142. const ktime_t timestamp = ktime_get();
  143. struct llist_node *signal, *sn;
  144. struct intel_context *ce;
  145. signal = NULL;
  146. if (unlikely(!llist_empty(&b->signaled_requests)))
  147. signal = llist_del_all(&b->signaled_requests);
  148. /*
  149. * Keep the irq armed until the interrupt after all listeners are gone.
  150. *
  151. * Enabling/disabling the interrupt is rather costly, roughly a couple
  152. * of hundred microseconds. If we are proactive and enable/disable
  153. * the interrupt around every request that wants a breadcrumb, we
  154. * quickly drown in the extra orders of magnitude of latency imposed
  155. * on request submission.
  156. *
  157. * So we try to be lazy, and keep the interrupts enabled until no
  158. * more listeners appear within a breadcrumb interrupt interval (that
  159. * is until a request completes that no one cares about). The
  160. * observation is that listeners come in batches, and will often
  161. * listen to a bunch of requests in succession. Though note on icl+,
  162. * interrupts are always enabled due to concerns with rc6 being
  163. * dysfunctional with per-engine interrupt masking.
  164. *
  165. * We also try to avoid raising too many interrupts, as they may
  166. * be generated by userspace batches and it is unfortunately rather
  167. * too easy to drown the CPU under a flood of GPU interrupts. Thus
  168. * whenever no one appears to be listening, we turn off the interrupts.
  169. * Fewer interrupts should conserve power -- at the very least, fewer
  170. * interrupt draw less ire from other users of the system and tools
  171. * like powertop.
  172. */
  173. if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
  174. intel_breadcrumbs_disarm_irq(b);
  175. rcu_read_lock();
  176. atomic_inc(&b->signaler_active);
  177. list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
  178. struct i915_request *rq;
  179. list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
  180. bool release;
  181. if (!__i915_request_is_complete(rq))
  182. break;
  183. if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
  184. &rq->fence.flags))
  185. break;
  186. /*
  187. * Queue for execution after dropping the signaling
  188. * spinlock as the callback chain may end up adding
  189. * more signalers to the same context or engine.
  190. */
  191. spin_lock(&ce->signal_lock);
  192. list_del_rcu(&rq->signal_link);
  193. release = remove_signaling_context(b, ce);
  194. spin_unlock(&ce->signal_lock);
  195. if (release) {
  196. if (intel_timeline_is_last(ce->timeline, rq))
  197. add_retire(b, ce->timeline);
  198. intel_context_put(ce);
  199. }
  200. if (__dma_fence_signal(&rq->fence))
  201. /* We own signal_node now, xfer to local list */
  202. signal = slist_add(&rq->signal_node, signal);
  203. else
  204. i915_request_put(rq);
  205. }
  206. }
  207. atomic_dec(&b->signaler_active);
  208. rcu_read_unlock();
  209. llist_for_each_safe(signal, sn, signal) {
  210. struct i915_request *rq =
  211. llist_entry(signal, typeof(*rq), signal_node);
  212. struct list_head cb_list;
  213. if (rq->engine->sched_engine->retire_inflight_request_prio)
  214. rq->engine->sched_engine->retire_inflight_request_prio(rq);
  215. spin_lock(&rq->lock);
  216. list_replace(&rq->fence.cb_list, &cb_list);
  217. __dma_fence_signal__timestamp(&rq->fence, timestamp);
  218. __dma_fence_signal__notify(&rq->fence, &cb_list);
  219. spin_unlock(&rq->lock);
  220. i915_request_put(rq);
  221. }
  222. /* Lazy irq enabling after HW submission */
  223. if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
  224. intel_breadcrumbs_arm_irq(b);
  225. /* And confirm that we still want irqs enabled before we yield */
  226. if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active))
  227. intel_breadcrumbs_disarm_irq(b);
  228. }
  229. struct intel_breadcrumbs *
  230. intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
  231. {
  232. struct intel_breadcrumbs *b;
  233. b = kzalloc_obj(*b);
  234. if (!b)
  235. return NULL;
  236. kref_init(&b->ref);
  237. spin_lock_init(&b->signalers_lock);
  238. INIT_LIST_HEAD(&b->signalers);
  239. init_llist_head(&b->signaled_requests);
  240. spin_lock_init(&b->irq_lock);
  241. init_irq_work(&b->irq_work, signal_irq_work);
  242. b->irq_engine = irq_engine;
  243. b->irq_enable = irq_enable;
  244. b->irq_disable = irq_disable;
  245. return b;
  246. }
  247. void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
  248. {
  249. unsigned long flags;
  250. if (!b->irq_engine)
  251. return;
  252. spin_lock_irqsave(&b->irq_lock, flags);
  253. if (b->irq_enabled)
  254. b->irq_enable(b);
  255. else
  256. b->irq_disable(b);
  257. spin_unlock_irqrestore(&b->irq_lock, flags);
  258. }
  259. void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
  260. {
  261. if (!READ_ONCE(b->irq_armed))
  262. return;
  263. /* Kick the work once more to drain the signalers, and disarm the irq */
  264. irq_work_queue(&b->irq_work);
  265. }
  266. void intel_breadcrumbs_free(struct kref *kref)
  267. {
  268. struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref);
  269. irq_work_sync(&b->irq_work);
  270. GEM_BUG_ON(!list_empty(&b->signalers));
  271. GEM_BUG_ON(b->irq_armed);
  272. kfree(b);
  273. }
  274. static void irq_signal_request(struct i915_request *rq,
  275. struct intel_breadcrumbs *b)
  276. {
  277. if (!__dma_fence_signal(&rq->fence))
  278. return;
  279. i915_request_get(rq);
  280. if (llist_add(&rq->signal_node, &b->signaled_requests))
  281. irq_work_queue(&b->irq_work);
  282. }
  283. static void insert_breadcrumb(struct i915_request *rq)
  284. {
  285. struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
  286. struct intel_context *ce = rq->context;
  287. struct list_head *pos;
  288. if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
  289. return;
  290. /*
  291. * If the request is already completed, we can transfer it
  292. * straight onto a signaled list, and queue the irq worker for
  293. * its signal completion.
  294. */
  295. if (__i915_request_is_complete(rq)) {
  296. irq_signal_request(rq, b);
  297. return;
  298. }
  299. if (list_empty(&ce->signals)) {
  300. intel_context_get(ce);
  301. add_signaling_context(b, ce);
  302. pos = &ce->signals;
  303. } else {
  304. /*
  305. * We keep the seqno in retirement order, so we can break
  306. * inside intel_engine_signal_breadcrumbs as soon as we've
  307. * passed the last completed request (or seen a request that
  308. * hasn't event started). We could walk the timeline->requests,
  309. * but keeping a separate signalers_list has the advantage of
  310. * hopefully being much smaller than the full list and so
  311. * provides faster iteration and detection when there are no
  312. * more interrupts required for this context.
  313. *
  314. * We typically expect to add new signalers in order, so we
  315. * start looking for our insertion point from the tail of
  316. * the list.
  317. */
  318. list_for_each_prev(pos, &ce->signals) {
  319. struct i915_request *it =
  320. list_entry(pos, typeof(*it), signal_link);
  321. if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
  322. break;
  323. }
  324. }
  325. i915_request_get(rq);
  326. list_add_rcu(&rq->signal_link, pos);
  327. GEM_BUG_ON(!check_signal_order(ce, rq));
  328. GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
  329. set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
  330. /*
  331. * Defer enabling the interrupt to after HW submission and recheck
  332. * the request as it may have completed and raised the interrupt as
  333. * we were attaching it into the lists.
  334. */
  335. if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq))
  336. irq_work_queue(&b->irq_work);
  337. }
  338. bool i915_request_enable_breadcrumb(struct i915_request *rq)
  339. {
  340. struct intel_context *ce = rq->context;
  341. /* Serialises with i915_request_retire() using rq->lock */
  342. if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
  343. return true;
  344. /*
  345. * Peek at i915_request_submit()/i915_request_unsubmit() status.
  346. *
  347. * If the request is not yet active (and not signaled), we will
  348. * attach the breadcrumb later.
  349. */
  350. if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
  351. return true;
  352. spin_lock(&ce->signal_lock);
  353. if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
  354. insert_breadcrumb(rq);
  355. spin_unlock(&ce->signal_lock);
  356. return true;
  357. }
  358. void i915_request_cancel_breadcrumb(struct i915_request *rq)
  359. {
  360. struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
  361. struct intel_context *ce = rq->context;
  362. bool release;
  363. spin_lock(&ce->signal_lock);
  364. if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
  365. spin_unlock(&ce->signal_lock);
  366. return;
  367. }
  368. list_del_rcu(&rq->signal_link);
  369. release = remove_signaling_context(b, ce);
  370. spin_unlock(&ce->signal_lock);
  371. if (release)
  372. intel_context_put(ce);
  373. if (__i915_request_is_complete(rq))
  374. irq_signal_request(rq, b);
  375. i915_request_put(rq);
  376. }
  377. void intel_context_remove_breadcrumbs(struct intel_context *ce,
  378. struct intel_breadcrumbs *b)
  379. {
  380. struct i915_request *rq, *rn;
  381. bool release = false;
  382. unsigned long flags;
  383. spin_lock_irqsave(&ce->signal_lock, flags);
  384. if (list_empty(&ce->signals))
  385. goto unlock;
  386. list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
  387. GEM_BUG_ON(!__i915_request_is_complete(rq));
  388. if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
  389. &rq->fence.flags))
  390. continue;
  391. list_del_rcu(&rq->signal_link);
  392. irq_signal_request(rq, b);
  393. i915_request_put(rq);
  394. }
  395. release = remove_signaling_context(b, ce);
  396. unlock:
  397. spin_unlock_irqrestore(&ce->signal_lock, flags);
  398. if (release)
  399. intel_context_put(ce);
  400. while (atomic_read(&b->signaler_active))
  401. cpu_relax();
  402. }
  403. static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
  404. {
  405. struct intel_context *ce;
  406. struct i915_request *rq;
  407. drm_printf(p, "Signals:\n");
  408. rcu_read_lock();
  409. list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
  410. list_for_each_entry_rcu(rq, &ce->signals, signal_link)
  411. drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
  412. rq->fence.context, rq->fence.seqno,
  413. __i915_request_is_complete(rq) ? "!" :
  414. __i915_request_has_started(rq) ? "*" :
  415. "",
  416. jiffies_to_msecs(jiffies - rq->emitted_jiffies));
  417. }
  418. rcu_read_unlock();
  419. }
  420. void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
  421. struct drm_printer *p)
  422. {
  423. struct intel_breadcrumbs *b;
  424. b = engine->breadcrumbs;
  425. if (!b)
  426. return;
  427. drm_printf(p, "IRQ: %s\n", str_enabled_disabled(b->irq_armed));
  428. if (!list_empty(&b->signalers))
  429. print_signals(b, p);
  430. }