intel_context.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. // SPDX-License-Identifier: MIT
  2. /*
  3. * Copyright © 2019 Intel Corporation
  4. */
  5. #include "gem/i915_gem_context.h"
  6. #include "gem/i915_gem_pm.h"
  7. #include "i915_drm_client.h"
  8. #include "i915_drv.h"
  9. #include "i915_trace.h"
  10. #include "intel_context.h"
  11. #include "intel_engine.h"
  12. #include "intel_engine_pm.h"
  13. #include "intel_ring.h"
  14. static struct kmem_cache *slab_ce;
  15. static struct intel_context *intel_context_alloc(void)
  16. {
  17. return kmem_cache_zalloc(slab_ce, GFP_KERNEL);
  18. }
  19. static void rcu_context_free(struct rcu_head *rcu)
  20. {
  21. struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
  22. trace_intel_context_free(ce);
  23. if (intel_context_has_own_state(ce))
  24. fput(ce->default_state);
  25. kmem_cache_free(slab_ce, ce);
  26. }
  27. void intel_context_free(struct intel_context *ce)
  28. {
  29. call_rcu(&ce->rcu, rcu_context_free);
  30. }
  31. struct intel_context *
  32. intel_context_create(struct intel_engine_cs *engine)
  33. {
  34. struct intel_context *ce;
  35. ce = intel_context_alloc();
  36. if (!ce)
  37. return ERR_PTR(-ENOMEM);
  38. intel_context_init(ce, engine);
  39. trace_intel_context_create(ce);
  40. return ce;
  41. }
  42. int intel_context_alloc_state(struct intel_context *ce)
  43. {
  44. struct i915_gem_context *ctx;
  45. int err = 0;
  46. if (mutex_lock_interruptible(&ce->pin_mutex))
  47. return -EINTR;
  48. if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
  49. if (intel_context_is_banned(ce)) {
  50. err = -EIO;
  51. goto unlock;
  52. }
  53. err = ce->ops->alloc(ce);
  54. if (unlikely(err))
  55. goto unlock;
  56. set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
  57. rcu_read_lock();
  58. ctx = rcu_dereference(ce->gem_context);
  59. if (ctx && !kref_get_unless_zero(&ctx->ref))
  60. ctx = NULL;
  61. rcu_read_unlock();
  62. if (ctx) {
  63. if (ctx->client)
  64. i915_drm_client_add_context_objects(ctx->client,
  65. ce);
  66. i915_gem_context_put(ctx);
  67. }
  68. }
  69. unlock:
  70. mutex_unlock(&ce->pin_mutex);
  71. return err;
  72. }
  73. static int intel_context_active_acquire(struct intel_context *ce)
  74. {
  75. int err;
  76. __i915_active_acquire(&ce->active);
  77. if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
  78. intel_context_is_parallel(ce))
  79. return 0;
  80. /* Preallocate tracking nodes */
  81. err = i915_active_acquire_preallocate_barrier(&ce->active,
  82. ce->engine);
  83. if (err)
  84. i915_active_release(&ce->active);
  85. return err;
  86. }
  87. static void intel_context_active_release(struct intel_context *ce)
  88. {
  89. /* Nodes preallocated in intel_context_active() */
  90. i915_active_acquire_barrier(&ce->active);
  91. i915_active_release(&ce->active);
  92. }
  93. static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
  94. {
  95. unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
  96. int err;
  97. err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
  98. if (err)
  99. return err;
  100. err = i915_active_acquire(&vma->active);
  101. if (err)
  102. goto err_unpin;
  103. /*
  104. * And mark it as a globally pinned object to let the shrinker know
  105. * it cannot reclaim the object until we release it.
  106. */
  107. i915_vma_make_unshrinkable(vma);
  108. vma->obj->mm.dirty = true;
  109. return 0;
  110. err_unpin:
  111. i915_vma_unpin(vma);
  112. return err;
  113. }
  114. static void __context_unpin_state(struct i915_vma *vma)
  115. {
  116. i915_vma_make_shrinkable(vma);
  117. i915_active_release(&vma->active);
  118. __i915_vma_unpin(vma);
  119. }
  120. static int __ring_active(struct intel_ring *ring,
  121. struct i915_gem_ww_ctx *ww)
  122. {
  123. int err;
  124. err = intel_ring_pin(ring, ww);
  125. if (err)
  126. return err;
  127. err = i915_active_acquire(&ring->vma->active);
  128. if (err)
  129. goto err_pin;
  130. return 0;
  131. err_pin:
  132. intel_ring_unpin(ring);
  133. return err;
  134. }
  135. static void __ring_retire(struct intel_ring *ring)
  136. {
  137. i915_active_release(&ring->vma->active);
  138. intel_ring_unpin(ring);
  139. }
  140. static int intel_context_pre_pin(struct intel_context *ce,
  141. struct i915_gem_ww_ctx *ww)
  142. {
  143. int err;
  144. CE_TRACE(ce, "active\n");
  145. err = __ring_active(ce->ring, ww);
  146. if (err)
  147. return err;
  148. err = intel_timeline_pin(ce->timeline, ww);
  149. if (err)
  150. goto err_ring;
  151. if (!ce->state)
  152. return 0;
  153. err = __context_pin_state(ce->state, ww);
  154. if (err)
  155. goto err_timeline;
  156. return 0;
  157. err_timeline:
  158. intel_timeline_unpin(ce->timeline);
  159. err_ring:
  160. __ring_retire(ce->ring);
  161. return err;
  162. }
  163. static void intel_context_post_unpin(struct intel_context *ce)
  164. {
  165. if (ce->state)
  166. __context_unpin_state(ce->state);
  167. intel_timeline_unpin(ce->timeline);
  168. __ring_retire(ce->ring);
  169. }
  170. int __intel_context_do_pin_ww(struct intel_context *ce,
  171. struct i915_gem_ww_ctx *ww)
  172. {
  173. bool handoff = false;
  174. void *vaddr;
  175. int err = 0;
  176. if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
  177. err = intel_context_alloc_state(ce);
  178. if (err)
  179. return err;
  180. }
  181. /*
  182. * We always pin the context/ring/timeline here, to ensure a pin
  183. * refcount for __intel_context_active(), which prevent a lock
  184. * inversion of ce->pin_mutex vs dma_resv_lock().
  185. */
  186. err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
  187. if (!err)
  188. err = i915_gem_object_lock(ce->ring->vma->obj, ww);
  189. if (!err && ce->state)
  190. err = i915_gem_object_lock(ce->state->obj, ww);
  191. if (!err)
  192. err = intel_context_pre_pin(ce, ww);
  193. if (err)
  194. return err;
  195. err = ce->ops->pre_pin(ce, ww, &vaddr);
  196. if (err)
  197. goto err_ctx_unpin;
  198. err = i915_active_acquire(&ce->active);
  199. if (err)
  200. goto err_post_unpin;
  201. err = mutex_lock_interruptible(&ce->pin_mutex);
  202. if (err)
  203. goto err_release;
  204. intel_engine_pm_might_get(ce->engine);
  205. if (unlikely(intel_context_is_closed(ce))) {
  206. err = -ENOENT;
  207. goto err_unlock;
  208. }
  209. if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
  210. err = intel_context_active_acquire(ce);
  211. if (unlikely(err))
  212. goto err_unlock;
  213. err = ce->ops->pin(ce, vaddr);
  214. if (err) {
  215. intel_context_active_release(ce);
  216. goto err_unlock;
  217. }
  218. CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
  219. i915_ggtt_offset(ce->ring->vma),
  220. ce->ring->head, ce->ring->tail);
  221. handoff = true;
  222. smp_mb__before_atomic(); /* flush pin before it is visible */
  223. atomic_inc(&ce->pin_count);
  224. }
  225. GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
  226. trace_intel_context_do_pin(ce);
  227. err_unlock:
  228. mutex_unlock(&ce->pin_mutex);
  229. err_release:
  230. i915_active_release(&ce->active);
  231. err_post_unpin:
  232. if (!handoff)
  233. ce->ops->post_unpin(ce);
  234. err_ctx_unpin:
  235. intel_context_post_unpin(ce);
  236. /*
  237. * Unlock the hwsp_ggtt object since it's shared.
  238. * In principle we can unlock all the global state locked above
  239. * since it's pinned and doesn't need fencing, and will
  240. * thus remain resident until it is explicitly unpinned.
  241. */
  242. i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
  243. return err;
  244. }
  245. int __intel_context_do_pin(struct intel_context *ce)
  246. {
  247. struct i915_gem_ww_ctx ww;
  248. int err;
  249. i915_gem_ww_ctx_init(&ww, true);
  250. retry:
  251. err = __intel_context_do_pin_ww(ce, &ww);
  252. if (err == -EDEADLK) {
  253. err = i915_gem_ww_ctx_backoff(&ww);
  254. if (!err)
  255. goto retry;
  256. }
  257. i915_gem_ww_ctx_fini(&ww);
  258. return err;
  259. }
  260. void __intel_context_do_unpin(struct intel_context *ce, int sub)
  261. {
  262. if (!atomic_sub_and_test(sub, &ce->pin_count))
  263. return;
  264. CE_TRACE(ce, "unpin\n");
  265. ce->ops->unpin(ce);
  266. ce->ops->post_unpin(ce);
  267. /*
  268. * Once released, we may asynchronously drop the active reference.
  269. * As that may be the only reference keeping the context alive,
  270. * take an extra now so that it is not freed before we finish
  271. * dereferencing it.
  272. */
  273. intel_context_get(ce);
  274. intel_context_active_release(ce);
  275. trace_intel_context_do_unpin(ce);
  276. intel_context_put(ce);
  277. }
  278. static void __intel_context_retire(struct i915_active *active)
  279. {
  280. struct intel_context *ce = container_of(active, typeof(*ce), active);
  281. CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
  282. intel_context_get_total_runtime_ns(ce),
  283. intel_context_get_avg_runtime_ns(ce));
  284. set_bit(CONTEXT_VALID_BIT, &ce->flags);
  285. intel_context_post_unpin(ce);
  286. intel_context_put(ce);
  287. }
  288. static int __intel_context_active(struct i915_active *active)
  289. {
  290. struct intel_context *ce = container_of(active, typeof(*ce), active);
  291. intel_context_get(ce);
  292. /* everything should already be activated by intel_context_pre_pin() */
  293. GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
  294. __intel_ring_pin(ce->ring);
  295. __intel_timeline_pin(ce->timeline);
  296. if (ce->state) {
  297. GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
  298. __i915_vma_pin(ce->state);
  299. i915_vma_make_unshrinkable(ce->state);
  300. }
  301. return 0;
  302. }
  303. static int
  304. sw_fence_dummy_notify(struct i915_sw_fence *sf,
  305. enum i915_sw_fence_notify state)
  306. {
  307. return NOTIFY_DONE;
  308. }
  309. void
  310. intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
  311. {
  312. GEM_BUG_ON(!engine->cops);
  313. GEM_BUG_ON(!engine->gt->vm);
  314. kref_init(&ce->ref);
  315. ce->engine = engine;
  316. ce->ops = engine->cops;
  317. ce->sseu = engine->sseu;
  318. ce->ring = NULL;
  319. ce->ring_size = SZ_4K;
  320. ewma_runtime_init(&ce->stats.runtime.avg);
  321. ce->vm = i915_vm_get(engine->gt->vm);
  322. /* NB ce->signal_link/lock is used under RCU */
  323. spin_lock_init(&ce->signal_lock);
  324. INIT_LIST_HEAD(&ce->signals);
  325. mutex_init(&ce->pin_mutex);
  326. spin_lock_init(&ce->guc_state.lock);
  327. INIT_LIST_HEAD(&ce->guc_state.fences);
  328. INIT_LIST_HEAD(&ce->guc_state.requests);
  329. ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
  330. INIT_LIST_HEAD(&ce->guc_id.link);
  331. INIT_LIST_HEAD(&ce->destroyed_link);
  332. INIT_LIST_HEAD(&ce->parallel.child_list);
  333. /*
  334. * Initialize fence to be complete as this is expected to be complete
  335. * unless there is a pending schedule disable outstanding.
  336. */
  337. i915_sw_fence_init(&ce->guc_state.blocked,
  338. sw_fence_dummy_notify);
  339. i915_sw_fence_commit(&ce->guc_state.blocked);
  340. i915_active_init(&ce->active,
  341. __intel_context_active, __intel_context_retire, 0);
  342. }
  343. void intel_context_fini(struct intel_context *ce)
  344. {
  345. struct intel_context *child, *next;
  346. if (ce->timeline)
  347. intel_timeline_put(ce->timeline);
  348. i915_vm_put(ce->vm);
  349. /* Need to put the creation ref for the children */
  350. if (intel_context_is_parent(ce))
  351. for_each_child_safe(ce, child, next)
  352. intel_context_put(child);
  353. mutex_destroy(&ce->pin_mutex);
  354. i915_active_fini(&ce->active);
  355. i915_sw_fence_fini(&ce->guc_state.blocked);
  356. }
  357. void i915_context_module_exit(void)
  358. {
  359. kmem_cache_destroy(slab_ce);
  360. }
  361. int __init i915_context_module_init(void)
  362. {
  363. slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
  364. if (!slab_ce)
  365. return -ENOMEM;
  366. return 0;
  367. }
  368. void intel_context_enter_engine(struct intel_context *ce)
  369. {
  370. intel_engine_pm_get(ce->engine);
  371. intel_timeline_enter(ce->timeline);
  372. }
  373. void intel_context_exit_engine(struct intel_context *ce)
  374. {
  375. intel_timeline_exit(ce->timeline);
  376. intel_engine_pm_put(ce->engine);
  377. }
  378. int intel_context_prepare_remote_request(struct intel_context *ce,
  379. struct i915_request *rq)
  380. {
  381. struct intel_timeline *tl = ce->timeline;
  382. int err;
  383. /* Only suitable for use in remotely modifying this context */
  384. GEM_BUG_ON(rq->context == ce);
  385. if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
  386. /* Queue this switch after current activity by this context. */
  387. err = i915_active_fence_set(&tl->last_request, rq);
  388. if (err)
  389. return err;
  390. }
  391. /*
  392. * Guarantee context image and the timeline remains pinned until the
  393. * modifying request is retired by setting the ce activity tracker.
  394. *
  395. * But we only need to take one pin on the account of it. Or in other
  396. * words transfer the pinned ce object to tracked active request.
  397. */
  398. GEM_BUG_ON(i915_active_is_idle(&ce->active));
  399. return i915_active_add_request(&ce->active, rq);
  400. }
  401. struct i915_request *intel_context_create_request(struct intel_context *ce)
  402. {
  403. struct i915_gem_ww_ctx ww;
  404. struct i915_request *rq;
  405. int err;
  406. i915_gem_ww_ctx_init(&ww, true);
  407. retry:
  408. err = intel_context_pin_ww(ce, &ww);
  409. if (!err) {
  410. rq = i915_request_create(ce);
  411. intel_context_unpin(ce);
  412. } else if (err == -EDEADLK) {
  413. err = i915_gem_ww_ctx_backoff(&ww);
  414. if (!err)
  415. goto retry;
  416. rq = ERR_PTR(err);
  417. } else {
  418. rq = ERR_PTR(err);
  419. }
  420. i915_gem_ww_ctx_fini(&ww);
  421. if (IS_ERR(rq))
  422. return rq;
  423. /*
  424. * timeline->mutex should be the inner lock, but is used as outer lock.
  425. * Hack around this to shut up lockdep in selftests..
  426. */
  427. lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
  428. mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
  429. mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
  430. rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
  431. return rq;
  432. }
  433. struct i915_request *intel_context_get_active_request(struct intel_context *ce)
  434. {
  435. struct intel_context *parent = intel_context_to_parent(ce);
  436. struct i915_request *rq, *active = NULL;
  437. unsigned long flags;
  438. GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
  439. /*
  440. * We search the parent list to find an active request on the submitted
  441. * context. The parent list contains the requests for all the contexts
  442. * in the relationship so we have to do a compare of each request's
  443. * context.
  444. */
  445. spin_lock_irqsave(&parent->guc_state.lock, flags);
  446. list_for_each_entry_reverse(rq, &parent->guc_state.requests,
  447. sched.link) {
  448. if (rq->context != ce)
  449. continue;
  450. if (i915_request_completed(rq))
  451. break;
  452. active = rq;
  453. }
  454. if (active)
  455. active = i915_request_get_rcu(active);
  456. spin_unlock_irqrestore(&parent->guc_state.lock, flags);
  457. return active;
  458. }
  459. void intel_context_bind_parent_child(struct intel_context *parent,
  460. struct intel_context *child)
  461. {
  462. /*
  463. * Callers responsibility to validate that this function is used
  464. * correctly but we use GEM_BUG_ON here ensure that they do.
  465. */
  466. GEM_BUG_ON(intel_context_is_pinned(parent));
  467. GEM_BUG_ON(intel_context_is_child(parent));
  468. GEM_BUG_ON(intel_context_is_pinned(child));
  469. GEM_BUG_ON(intel_context_is_child(child));
  470. GEM_BUG_ON(intel_context_is_parent(child));
  471. parent->parallel.child_index = parent->parallel.number_children++;
  472. list_add_tail(&child->parallel.child_link,
  473. &parent->parallel.child_list);
  474. child->parallel.parent = parent;
  475. }
  476. u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
  477. {
  478. u64 total, active;
  479. if (ce->ops->update_stats)
  480. ce->ops->update_stats(ce);
  481. total = ce->stats.runtime.total;
  482. if (ce->ops->flags & COPS_RUNTIME_CYCLES)
  483. total *= ce->engine->gt->clock_period_ns;
  484. active = READ_ONCE(ce->stats.active);
  485. if (active)
  486. active = intel_context_clock() - active;
  487. return total + active;
  488. }
  489. u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
  490. {
  491. u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);
  492. if (ce->ops->flags & COPS_RUNTIME_CYCLES)
  493. avg *= ce->engine->gt->clock_period_ns;
  494. return avg;
  495. }
  496. bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
  497. {
  498. bool ret = intel_context_set_banned(ce);
  499. trace_intel_context_ban(ce);
  500. if (ce->ops->revoke)
  501. ce->ops->revoke(ce, rq,
  502. INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
  503. return ret;
  504. }
  505. bool intel_context_revoke(struct intel_context *ce)
  506. {
  507. bool ret = intel_context_set_exiting(ce);
  508. if (ce->ops->revoke)
  509. ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms);
  510. return ret;
  511. }
  512. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  513. #include "selftest_context.c"
  514. #endif