selftest_engine_pm.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright © 2018 Intel Corporation
  4. */
  5. #include <linux/sort.h>
  6. #include "gt/intel_gt_print.h"
  7. #include "i915_selftest.h"
  8. #include "intel_engine_regs.h"
  9. #include "intel_gpu_commands.h"
  10. #include "intel_gt_clock_utils.h"
  11. #include "selftest_engine.h"
  12. #include "selftest_engine_heartbeat.h"
  13. #include "selftests/igt_atomic.h"
  14. #include "selftests/igt_flush_test.h"
  15. #include "selftests/igt_spinner.h"
  16. #define COUNT 5
  17. static int cmp_u64(const void *A, const void *B)
  18. {
  19. const u64 *a = A, *b = B;
  20. return *a - *b;
  21. }
  22. static u64 trifilter(u64 *a)
  23. {
  24. sort(a, COUNT, sizeof(*a), cmp_u64, NULL);
  25. return (a[1] + 2 * a[2] + a[3]) >> 2;
  26. }
  27. static u32 *emit_wait(u32 *cs, u32 offset, int op, u32 value)
  28. {
  29. *cs++ = MI_SEMAPHORE_WAIT |
  30. MI_SEMAPHORE_GLOBAL_GTT |
  31. MI_SEMAPHORE_POLL |
  32. op;
  33. *cs++ = value;
  34. *cs++ = offset;
  35. *cs++ = 0;
  36. return cs;
  37. }
  38. static u32 *emit_store(u32 *cs, u32 offset, u32 value)
  39. {
  40. *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
  41. *cs++ = offset;
  42. *cs++ = 0;
  43. *cs++ = value;
  44. return cs;
  45. }
  46. static u32 *emit_srm(u32 *cs, i915_reg_t reg, u32 offset)
  47. {
  48. *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
  49. *cs++ = i915_mmio_reg_offset(reg);
  50. *cs++ = offset;
  51. *cs++ = 0;
  52. return cs;
  53. }
  54. static void write_semaphore(u32 *x, u32 value)
  55. {
  56. WRITE_ONCE(*x, value);
  57. wmb();
  58. }
  59. static int __measure_timestamps(struct intel_context *ce,
  60. u64 *dt, u64 *d_ring, u64 *d_ctx)
  61. {
  62. struct intel_engine_cs *engine = ce->engine;
  63. u32 *sema = memset32(engine->status_page.addr + 1000, 0, 5);
  64. u32 offset = i915_ggtt_offset(engine->status_page.vma);
  65. struct i915_request *rq;
  66. u32 *cs;
  67. rq = intel_context_create_request(ce);
  68. if (IS_ERR(rq))
  69. return PTR_ERR(rq);
  70. cs = intel_ring_begin(rq, 28);
  71. if (IS_ERR(cs)) {
  72. i915_request_add(rq);
  73. return PTR_ERR(cs);
  74. }
  75. /* Signal & wait for start */
  76. cs = emit_store(cs, offset + 4008, 1);
  77. cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_NEQ_SDD, 1);
  78. cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4000);
  79. cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4004);
  80. /* Busy wait */
  81. cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_EQ_SDD, 1);
  82. cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4016);
  83. cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4012);
  84. intel_ring_advance(rq, cs);
  85. i915_request_get(rq);
  86. i915_request_add(rq);
  87. intel_engine_flush_submission(engine);
  88. /* Wait for the request to start executing, that then waits for us */
  89. while (READ_ONCE(sema[2]) == 0)
  90. cpu_relax();
  91. /* Run the request for a 100us, sampling timestamps before/after */
  92. local_irq_disable();
  93. write_semaphore(&sema[2], 0);
  94. while (READ_ONCE(sema[1]) == 0) /* wait for the gpu to catch up */
  95. cpu_relax();
  96. *dt = local_clock();
  97. udelay(100);
  98. *dt = local_clock() - *dt;
  99. write_semaphore(&sema[2], 1);
  100. local_irq_enable();
  101. if (i915_request_wait(rq, 0, HZ / 2) < 0) {
  102. i915_request_put(rq);
  103. return -ETIME;
  104. }
  105. i915_request_put(rq);
  106. pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n",
  107. engine->name, sema[1], sema[3], sema[0], sema[4]);
  108. *d_ctx = sema[3] - sema[1];
  109. *d_ring = sema[4] - sema[0];
  110. return 0;
  111. }
  112. static int __live_engine_timestamps(struct intel_engine_cs *engine)
  113. {
  114. u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt;
  115. struct intel_context *ce;
  116. int i, err = 0;
  117. ce = intel_context_create(engine);
  118. if (IS_ERR(ce))
  119. return PTR_ERR(ce);
  120. for (i = 0; i < COUNT; i++) {
  121. err = __measure_timestamps(ce, &st[i], &s_ring[i], &s_ctx[i]);
  122. if (err)
  123. break;
  124. }
  125. intel_context_put(ce);
  126. if (err)
  127. return err;
  128. dt = trifilter(st);
  129. d_ring = trifilter(s_ring);
  130. d_ctx = trifilter(s_ctx);
  131. pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%lldns, RING_TIMESTAMP:%lldns\n",
  132. engine->name, dt,
  133. intel_gt_clock_interval_to_ns(engine->gt, d_ctx),
  134. intel_gt_clock_interval_to_ns(engine->gt, d_ring));
  135. d_ring = intel_gt_clock_interval_to_ns(engine->gt, d_ring);
  136. if (3 * dt > 4 * d_ring || 4 * dt < 3 * d_ring) {
  137. pr_err("%s Mismatch between ring timestamp and walltime!\n",
  138. engine->name);
  139. return -EINVAL;
  140. }
  141. d_ring = trifilter(s_ring);
  142. d_ctx = trifilter(s_ctx);
  143. d_ctx *= engine->gt->clock_frequency;
  144. if (GRAPHICS_VER(engine->i915) == 11)
  145. d_ring *= 12500000; /* Fixed 80ns for GEN11 ctx timestamp? */
  146. else
  147. d_ring *= engine->gt->clock_frequency;
  148. if (3 * d_ctx > 4 * d_ring || 4 * d_ctx < 3 * d_ring) {
  149. pr_err("%s Mismatch between ring and context timestamps!\n",
  150. engine->name);
  151. return -EINVAL;
  152. }
  153. return 0;
  154. }
  155. static int live_engine_timestamps(void *arg)
  156. {
  157. struct intel_gt *gt = arg;
  158. struct intel_engine_cs *engine;
  159. enum intel_engine_id id;
  160. /*
  161. * Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share
  162. * the same CS clock.
  163. */
  164. if (GRAPHICS_VER(gt->i915) < 8)
  165. return 0;
  166. for_each_engine(engine, gt, id) {
  167. int err;
  168. st_engine_heartbeat_disable(engine);
  169. err = __live_engine_timestamps(engine);
  170. st_engine_heartbeat_enable(engine);
  171. if (err)
  172. return err;
  173. }
  174. return 0;
  175. }
  176. static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
  177. {
  178. ktime_t start, unused, dt;
  179. if (!intel_engine_uses_guc(engine))
  180. return 0;
  181. /*
  182. * In GuC mode of submission, the busyness stats may get updated after
  183. * the batch starts running. Poll for a change in busyness and timeout
  184. * after 500 us.
  185. */
  186. start = ktime_get();
  187. while (intel_engine_get_busy_time(engine, &unused) == busyness) {
  188. dt = ktime_get() - start;
  189. if (dt > 10000000) {
  190. pr_err("active wait timed out %lld\n", dt);
  191. ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
  192. return -ETIME;
  193. }
  194. }
  195. return 0;
  196. }
  197. static int live_engine_busy_stats(void *arg)
  198. {
  199. struct intel_gt *gt = arg;
  200. struct intel_engine_cs *engine;
  201. enum intel_engine_id id;
  202. struct igt_spinner spin;
  203. int err = 0;
  204. /*
  205. * Check that if an engine supports busy-stats, they tell the truth.
  206. */
  207. if (igt_spinner_init(&spin, gt))
  208. return -ENOMEM;
  209. GEM_BUG_ON(intel_gt_pm_is_awake(gt));
  210. for_each_engine(engine, gt, id) {
  211. struct i915_request *rq;
  212. ktime_t busyness, dummy;
  213. ktime_t de, dt;
  214. ktime_t t[2];
  215. if (!intel_engine_supports_stats(engine))
  216. continue;
  217. if (!intel_engine_can_store_dword(engine))
  218. continue;
  219. if (intel_gt_pm_wait_for_idle(gt)) {
  220. err = -EBUSY;
  221. break;
  222. }
  223. st_engine_heartbeat_disable(engine);
  224. ENGINE_TRACE(engine, "measuring idle time\n");
  225. preempt_disable();
  226. de = intel_engine_get_busy_time(engine, &t[0]);
  227. udelay(100);
  228. de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
  229. preempt_enable();
  230. dt = ktime_sub(t[1], t[0]);
  231. if (de < 0 || de > 10) {
  232. pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n",
  233. engine->name,
  234. de, (int)div64_u64(100 * de, dt), dt);
  235. GEM_TRACE_DUMP();
  236. err = -EINVAL;
  237. goto end;
  238. }
  239. /* 100% busy */
  240. rq = igt_spinner_create_request(&spin,
  241. engine->kernel_context,
  242. MI_NOOP);
  243. if (IS_ERR(rq)) {
  244. err = PTR_ERR(rq);
  245. goto end;
  246. }
  247. i915_request_add(rq);
  248. busyness = intel_engine_get_busy_time(engine, &dummy);
  249. if (!igt_wait_for_spinner(&spin, rq)) {
  250. intel_gt_set_wedged(engine->gt);
  251. err = -ETIME;
  252. goto end;
  253. }
  254. err = __spin_until_busier(engine, busyness);
  255. if (err) {
  256. GEM_TRACE_DUMP();
  257. goto end;
  258. }
  259. ENGINE_TRACE(engine, "measuring busy time\n");
  260. preempt_disable();
  261. de = intel_engine_get_busy_time(engine, &t[0]);
  262. mdelay(100);
  263. de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
  264. preempt_enable();
  265. dt = ktime_sub(t[1], t[0]);
  266. if (100 * de < 95 * dt || 95 * de > 100 * dt) {
  267. pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n",
  268. engine->name,
  269. de, (int)div64_u64(100 * de, dt), dt);
  270. GEM_TRACE_DUMP();
  271. err = -EINVAL;
  272. goto end;
  273. }
  274. end:
  275. st_engine_heartbeat_enable(engine);
  276. igt_spinner_end(&spin);
  277. if (igt_flush_test(gt->i915))
  278. err = -EIO;
  279. if (err)
  280. break;
  281. }
  282. igt_spinner_fini(&spin);
  283. if (igt_flush_test(gt->i915))
  284. err = -EIO;
  285. return err;
  286. }
  287. static int live_engine_pm(void *arg)
  288. {
  289. struct intel_gt *gt = arg;
  290. struct intel_engine_cs *engine;
  291. enum intel_engine_id id;
  292. /*
  293. * Check we can call intel_engine_pm_put from any context. No
  294. * failures are reported directly, but if we mess up lockdep should
  295. * tell us.
  296. */
  297. if (intel_gt_pm_wait_for_idle(gt)) {
  298. pr_err("Unable to flush GT pm before test\n");
  299. return -EBUSY;
  300. }
  301. GEM_BUG_ON(intel_gt_pm_is_awake(gt));
  302. for_each_engine(engine, gt, id) {
  303. const typeof(*igt_atomic_phases) *p;
  304. for (p = igt_atomic_phases; p->name; p++) {
  305. /*
  306. * Acquisition is always synchronous, except if we
  307. * know that the engine is already awake, in which
  308. * case we should use intel_engine_pm_get_if_awake()
  309. * to atomically grab the wakeref.
  310. *
  311. * In practice,
  312. * intel_engine_pm_get();
  313. * intel_engine_pm_put();
  314. * occurs in one thread, while simultaneously
  315. * intel_engine_pm_get_if_awake();
  316. * intel_engine_pm_put();
  317. * occurs from atomic context in another.
  318. */
  319. GEM_BUG_ON(intel_engine_pm_is_awake(engine));
  320. intel_engine_pm_get(engine);
  321. p->critical_section_begin();
  322. if (!intel_engine_pm_get_if_awake(engine))
  323. pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
  324. engine->name, p->name);
  325. else
  326. intel_engine_pm_put_async(engine);
  327. intel_engine_pm_put_async(engine);
  328. p->critical_section_end();
  329. intel_engine_pm_flush(engine);
  330. if (intel_engine_pm_is_awake(engine)) {
  331. pr_err("%s is still awake after flushing pm\n",
  332. engine->name);
  333. return -EINVAL;
  334. }
  335. /* gt wakeref is async (deferred to workqueue) */
  336. if (intel_gt_pm_wait_for_idle(gt)) {
  337. gt_err(gt, "GT failed to idle\n");
  338. return -EINVAL;
  339. }
  340. }
  341. }
  342. return 0;
  343. }
  344. int live_engine_pm_selftests(struct intel_gt *gt)
  345. {
  346. static const struct i915_subtest tests[] = {
  347. SUBTEST(live_engine_timestamps),
  348. SUBTEST(live_engine_busy_stats),
  349. SUBTEST(live_engine_pm),
  350. };
  351. return intel_gt_live_subtests(tests, gt);
  352. }