i915_perf.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2019 Intel Corporation
  5. */
  6. #include <linux/kref.h>
  7. #include "gem/i915_gem_pm.h"
  8. #include "gt/intel_gt.h"
  9. #include "i915_selftest.h"
  10. #include "igt_flush_test.h"
  11. #include "lib_sw_fence.h"
  12. #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
  13. static int
  14. alloc_empty_config(struct i915_perf *perf)
  15. {
  16. struct i915_oa_config *oa_config;
  17. oa_config = kzalloc_obj(*oa_config);
  18. if (!oa_config)
  19. return -ENOMEM;
  20. oa_config->perf = perf;
  21. kref_init(&oa_config->ref);
  22. strscpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
  23. mutex_lock(&perf->metrics_lock);
  24. oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
  25. if (oa_config->id < 0) {
  26. mutex_unlock(&perf->metrics_lock);
  27. i915_oa_config_put(oa_config);
  28. return -ENOMEM;
  29. }
  30. mutex_unlock(&perf->metrics_lock);
  31. return 0;
  32. }
  33. static void
  34. destroy_empty_config(struct i915_perf *perf)
  35. {
  36. struct i915_oa_config *oa_config = NULL, *tmp;
  37. int id;
  38. mutex_lock(&perf->metrics_lock);
  39. idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  40. if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  41. oa_config = tmp;
  42. break;
  43. }
  44. }
  45. if (oa_config)
  46. idr_remove(&perf->metrics_idr, oa_config->id);
  47. mutex_unlock(&perf->metrics_lock);
  48. if (oa_config)
  49. i915_oa_config_put(oa_config);
  50. }
  51. static struct i915_oa_config *
  52. get_empty_config(struct i915_perf *perf)
  53. {
  54. struct i915_oa_config *oa_config = NULL, *tmp;
  55. int id;
  56. mutex_lock(&perf->metrics_lock);
  57. idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  58. if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  59. oa_config = i915_oa_config_get(tmp);
  60. break;
  61. }
  62. }
  63. mutex_unlock(&perf->metrics_lock);
  64. return oa_config;
  65. }
  66. static struct i915_perf_stream *
  67. test_stream(struct i915_perf *perf)
  68. {
  69. struct drm_i915_perf_open_param param = {};
  70. struct i915_oa_config *oa_config = get_empty_config(perf);
  71. struct perf_open_properties props = {
  72. .engine = intel_engine_lookup_user(perf->i915,
  73. I915_ENGINE_CLASS_RENDER,
  74. 0),
  75. .sample_flags = SAMPLE_OA_REPORT,
  76. .oa_format = GRAPHICS_VER(perf->i915) == 12 ?
  77. I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
  78. };
  79. struct i915_perf_stream *stream;
  80. struct intel_gt *gt;
  81. if (!props.engine)
  82. return NULL;
  83. gt = props.engine->gt;
  84. if (!oa_config)
  85. return NULL;
  86. props.metrics_set = oa_config->id;
  87. stream = kzalloc_obj(*stream);
  88. if (!stream) {
  89. i915_oa_config_put(oa_config);
  90. return NULL;
  91. }
  92. stream->perf = perf;
  93. mutex_lock(&gt->perf.lock);
  94. if (i915_oa_stream_init(stream, &param, &props)) {
  95. kfree(stream);
  96. stream = NULL;
  97. }
  98. mutex_unlock(&gt->perf.lock);
  99. i915_oa_config_put(oa_config);
  100. return stream;
  101. }
  102. static void stream_destroy(struct i915_perf_stream *stream)
  103. {
  104. struct intel_gt *gt = stream->engine->gt;
  105. mutex_lock(&gt->perf.lock);
  106. i915_perf_destroy_locked(stream);
  107. mutex_unlock(&gt->perf.lock);
  108. }
  109. static int live_sanitycheck(void *arg)
  110. {
  111. struct drm_i915_private *i915 = arg;
  112. struct i915_perf_stream *stream;
  113. /* Quick check we can create a perf stream */
  114. stream = test_stream(&i915->perf);
  115. if (!stream)
  116. return -EINVAL;
  117. stream_destroy(stream);
  118. return 0;
  119. }
  120. static int write_timestamp(struct i915_request *rq, int slot)
  121. {
  122. u32 *cs;
  123. int len;
  124. cs = intel_ring_begin(rq, 6);
  125. if (IS_ERR(cs))
  126. return PTR_ERR(cs);
  127. len = 5;
  128. if (GRAPHICS_VER(rq->i915) >= 8)
  129. len++;
  130. *cs++ = GFX_OP_PIPE_CONTROL(len);
  131. *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
  132. PIPE_CONTROL_STORE_DATA_INDEX |
  133. PIPE_CONTROL_WRITE_TIMESTAMP;
  134. *cs++ = slot * sizeof(u32);
  135. *cs++ = 0;
  136. *cs++ = 0;
  137. *cs++ = 0;
  138. intel_ring_advance(rq, cs);
  139. return 0;
  140. }
  141. static ktime_t poll_status(struct i915_request *rq, int slot)
  142. {
  143. while (!intel_read_status_page(rq->engine, slot) &&
  144. !i915_request_completed(rq))
  145. cpu_relax();
  146. return ktime_get();
  147. }
  148. static int live_noa_delay(void *arg)
  149. {
  150. struct drm_i915_private *i915 = arg;
  151. struct i915_perf_stream *stream;
  152. struct i915_request *rq;
  153. ktime_t t0, t1;
  154. u64 expected;
  155. u32 delay;
  156. int err;
  157. int i;
  158. /* Check that the GPU delays matches expectations */
  159. stream = test_stream(&i915->perf);
  160. if (!stream)
  161. return -ENOMEM;
  162. expected = atomic64_read(&stream->perf->noa_programming_delay);
  163. if (stream->engine->class != RENDER_CLASS) {
  164. err = -ENODEV;
  165. goto out;
  166. }
  167. for (i = 0; i < 4; i++)
  168. intel_write_status_page(stream->engine, 0x100 + i, 0);
  169. rq = intel_engine_create_kernel_request(stream->engine);
  170. if (IS_ERR(rq)) {
  171. err = PTR_ERR(rq);
  172. goto out;
  173. }
  174. if (rq->engine->emit_init_breadcrumb) {
  175. err = rq->engine->emit_init_breadcrumb(rq);
  176. if (err) {
  177. i915_request_add(rq);
  178. goto out;
  179. }
  180. }
  181. err = write_timestamp(rq, 0x100);
  182. if (err) {
  183. i915_request_add(rq);
  184. goto out;
  185. }
  186. err = rq->engine->emit_bb_start(rq,
  187. i915_ggtt_offset(stream->noa_wait), 0,
  188. I915_DISPATCH_SECURE);
  189. if (err) {
  190. i915_request_add(rq);
  191. goto out;
  192. }
  193. err = write_timestamp(rq, 0x102);
  194. if (err) {
  195. i915_request_add(rq);
  196. goto out;
  197. }
  198. i915_request_get(rq);
  199. i915_request_add(rq);
  200. preempt_disable();
  201. t0 = poll_status(rq, 0x100);
  202. t1 = poll_status(rq, 0x102);
  203. preempt_enable();
  204. pr_info("CPU delay: %lluns, expected %lluns\n",
  205. ktime_sub(t1, t0), expected);
  206. delay = intel_read_status_page(stream->engine, 0x102);
  207. delay -= intel_read_status_page(stream->engine, 0x100);
  208. delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
  209. pr_info("GPU delay: %uns, expected %lluns\n",
  210. delay, expected);
  211. if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
  212. pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
  213. delay / 1000,
  214. div_u64(3 * expected, 4000),
  215. div_u64(3 * expected, 2000));
  216. err = -EINVAL;
  217. }
  218. i915_request_put(rq);
  219. out:
  220. stream_destroy(stream);
  221. return err;
  222. }
  223. static int live_noa_gpr(void *arg)
  224. {
  225. struct drm_i915_private *i915 = arg;
  226. struct i915_perf_stream *stream;
  227. struct intel_context *ce;
  228. struct i915_request *rq;
  229. u32 *cs, *store;
  230. void *scratch;
  231. u32 gpr0;
  232. int err;
  233. int i;
  234. /* Check that the delay does not clobber user context state (GPR) */
  235. stream = test_stream(&i915->perf);
  236. if (!stream)
  237. return -ENOMEM;
  238. gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
  239. ce = intel_context_create(stream->engine);
  240. if (IS_ERR(ce)) {
  241. err = PTR_ERR(ce);
  242. goto out;
  243. }
  244. /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
  245. scratch = __px_vaddr(ce->vm->scratch[0]);
  246. memset(scratch, POISON_FREE, PAGE_SIZE);
  247. rq = intel_context_create_request(ce);
  248. if (IS_ERR(rq)) {
  249. err = PTR_ERR(rq);
  250. goto out_ce;
  251. }
  252. i915_request_get(rq);
  253. if (rq->engine->emit_init_breadcrumb) {
  254. err = rq->engine->emit_init_breadcrumb(rq);
  255. if (err) {
  256. i915_request_add(rq);
  257. goto out_rq;
  258. }
  259. }
  260. /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
  261. cs = intel_ring_begin(rq, 2 * 32 + 2);
  262. if (IS_ERR(cs)) {
  263. err = PTR_ERR(cs);
  264. i915_request_add(rq);
  265. goto out_rq;
  266. }
  267. *cs++ = MI_LOAD_REGISTER_IMM(32);
  268. for (i = 0; i < 32; i++) {
  269. *cs++ = gpr0 + i * sizeof(u32);
  270. *cs++ = STACK_MAGIC;
  271. }
  272. *cs++ = MI_NOOP;
  273. intel_ring_advance(rq, cs);
  274. /* Execute the GPU delay */
  275. err = rq->engine->emit_bb_start(rq,
  276. i915_ggtt_offset(stream->noa_wait), 0,
  277. I915_DISPATCH_SECURE);
  278. if (err) {
  279. i915_request_add(rq);
  280. goto out_rq;
  281. }
  282. /* Read the GPR back, using the pinned global HWSP for convenience */
  283. store = memset32(rq->engine->status_page.addr + 512, 0, 32);
  284. for (i = 0; i < 32; i++) {
  285. u32 cmd;
  286. cs = intel_ring_begin(rq, 4);
  287. if (IS_ERR(cs)) {
  288. err = PTR_ERR(cs);
  289. i915_request_add(rq);
  290. goto out_rq;
  291. }
  292. cmd = MI_STORE_REGISTER_MEM;
  293. if (GRAPHICS_VER(i915) >= 8)
  294. cmd++;
  295. cmd |= MI_USE_GGTT;
  296. *cs++ = cmd;
  297. *cs++ = gpr0 + i * sizeof(u32);
  298. *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
  299. offset_in_page(store) +
  300. i * sizeof(u32);
  301. *cs++ = 0;
  302. intel_ring_advance(rq, cs);
  303. }
  304. i915_request_add(rq);
  305. if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
  306. pr_err("noa_wait timed out\n");
  307. intel_gt_set_wedged(stream->engine->gt);
  308. err = -EIO;
  309. goto out_rq;
  310. }
  311. /* Verify that the GPR contain our expected values */
  312. for (i = 0; i < 32; i++) {
  313. if (store[i] == STACK_MAGIC)
  314. continue;
  315. pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
  316. i, store[i], STACK_MAGIC);
  317. err = -EINVAL;
  318. }
  319. /* Verify that the user's scratch page was not used for GPR storage */
  320. if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
  321. pr_err("Scratch page overwritten!\n");
  322. igt_hexdump(scratch, 4096);
  323. err = -EINVAL;
  324. }
  325. out_rq:
  326. i915_request_put(rq);
  327. out_ce:
  328. intel_context_put(ce);
  329. out:
  330. stream_destroy(stream);
  331. return err;
  332. }
  333. int i915_perf_live_selftests(struct drm_i915_private *i915)
  334. {
  335. static const struct i915_subtest tests[] = {
  336. SUBTEST(live_sanitycheck),
  337. SUBTEST(live_noa_delay),
  338. SUBTEST(live_noa_gpr),
  339. };
  340. struct i915_perf *perf = &i915->perf;
  341. int err;
  342. if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
  343. return 0;
  344. if (intel_gt_is_wedged(to_gt(i915)))
  345. return 0;
  346. err = alloc_empty_config(&i915->perf);
  347. if (err)
  348. return err;
  349. err = i915_live_subtests(tests, i915);
  350. destroy_empty_config(&i915->perf);
  351. return err;
  352. }