selftest_reset.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. // SPDX-License-Identifier: MIT
  2. /*
  3. * Copyright © 2018 Intel Corporation
  4. */
  5. #include <linux/crc32.h>
  6. #include "gem/i915_gem_stolen.h"
  7. #include "i915_memcpy.h"
  8. #include "i915_selftest.h"
  9. #include "intel_gpu_commands.h"
  10. #include "selftests/igt_reset.h"
  11. #include "selftests/igt_atomic.h"
  12. #include "selftests/igt_spinner.h"
  13. static int
  14. __igt_reset_stolen(struct intel_gt *gt,
  15. intel_engine_mask_t mask,
  16. const char *msg)
  17. {
  18. struct i915_ggtt *ggtt = gt->ggtt;
  19. const struct resource *dsm = &gt->i915->dsm.stolen;
  20. resource_size_t num_pages, page;
  21. struct intel_engine_cs *engine;
  22. intel_wakeref_t wakeref;
  23. enum intel_engine_id id;
  24. struct igt_spinner spin;
  25. long max, count;
  26. void *tmp;
  27. u32 *crc;
  28. int err;
  29. if (!drm_mm_node_allocated(&ggtt->error_capture))
  30. return 0;
  31. num_pages = resource_size(dsm) >> PAGE_SHIFT;
  32. if (!num_pages)
  33. return 0;
  34. crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
  35. if (!crc)
  36. return -ENOMEM;
  37. tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
  38. if (!tmp) {
  39. err = -ENOMEM;
  40. goto err_crc;
  41. }
  42. igt_global_reset_lock(gt);
  43. wakeref = intel_runtime_pm_get(gt->uncore->rpm);
  44. err = igt_spinner_init(&spin, gt);
  45. if (err)
  46. goto err_lock;
  47. for_each_engine(engine, gt, id) {
  48. struct intel_context *ce;
  49. struct i915_request *rq;
  50. if (!(mask & engine->mask))
  51. continue;
  52. if (!intel_engine_can_store_dword(engine))
  53. continue;
  54. ce = intel_context_create(engine);
  55. if (IS_ERR(ce)) {
  56. err = PTR_ERR(ce);
  57. goto err_spin;
  58. }
  59. rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
  60. intel_context_put(ce);
  61. if (IS_ERR(rq)) {
  62. err = PTR_ERR(rq);
  63. goto err_spin;
  64. }
  65. i915_request_add(rq);
  66. }
  67. for (page = 0; page < num_pages; page++) {
  68. dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
  69. void __iomem *s;
  70. void *in;
  71. ggtt->vm.insert_page(&ggtt->vm, dma,
  72. ggtt->error_capture.start,
  73. i915_gem_get_pat_index(gt->i915,
  74. I915_CACHE_NONE),
  75. 0);
  76. mb();
  77. s = io_mapping_map_wc(&ggtt->iomap,
  78. ggtt->error_capture.start,
  79. PAGE_SIZE);
  80. if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
  81. page << PAGE_SHIFT,
  82. ((page + 1) << PAGE_SHIFT) - 1))
  83. memset_io(s, STACK_MAGIC, PAGE_SIZE);
  84. in = (void __force *)s;
  85. if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
  86. in = tmp;
  87. crc[page] = crc32_le(0, in, PAGE_SIZE);
  88. io_mapping_unmap(s);
  89. }
  90. mb();
  91. ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
  92. if (mask == ALL_ENGINES) {
  93. intel_gt_reset(gt, mask, NULL);
  94. } else {
  95. for_each_engine(engine, gt, id) {
  96. if (mask & engine->mask)
  97. intel_engine_reset(engine, NULL);
  98. }
  99. }
  100. max = -1;
  101. count = 0;
  102. for (page = 0; page < num_pages; page++) {
  103. dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
  104. void __iomem *s;
  105. void *in;
  106. u32 x;
  107. ggtt->vm.insert_page(&ggtt->vm, dma,
  108. ggtt->error_capture.start,
  109. i915_gem_get_pat_index(gt->i915,
  110. I915_CACHE_NONE),
  111. 0);
  112. mb();
  113. s = io_mapping_map_wc(&ggtt->iomap,
  114. ggtt->error_capture.start,
  115. PAGE_SIZE);
  116. in = (void __force *)s;
  117. if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
  118. in = tmp;
  119. x = crc32_le(0, in, PAGE_SIZE);
  120. if (x != crc[page] &&
  121. !__drm_mm_interval_first(&gt->i915->mm.stolen,
  122. page << PAGE_SHIFT,
  123. ((page + 1) << PAGE_SHIFT) - 1)) {
  124. pr_debug("unused stolen page %pa modified by GPU reset\n",
  125. &page);
  126. if (count++ == 0)
  127. igt_hexdump(in, PAGE_SIZE);
  128. max = page;
  129. }
  130. io_mapping_unmap(s);
  131. }
  132. mb();
  133. ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
  134. if (count > 0) {
  135. pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
  136. msg, count, max);
  137. }
  138. if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
  139. pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
  140. msg, I915_GEM_STOLEN_BIAS);
  141. err = -EINVAL;
  142. }
  143. err_spin:
  144. igt_spinner_fini(&spin);
  145. err_lock:
  146. intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  147. igt_global_reset_unlock(gt);
  148. kfree(tmp);
  149. err_crc:
  150. kfree(crc);
  151. return err;
  152. }
  153. static int igt_reset_device_stolen(void *arg)
  154. {
  155. return __igt_reset_stolen(arg, ALL_ENGINES, "device");
  156. }
  157. static int igt_reset_engines_stolen(void *arg)
  158. {
  159. struct intel_gt *gt = arg;
  160. struct intel_engine_cs *engine;
  161. enum intel_engine_id id;
  162. int err;
  163. if (!intel_has_reset_engine(gt))
  164. return 0;
  165. for_each_engine(engine, gt, id) {
  166. err = __igt_reset_stolen(gt, engine->mask, engine->name);
  167. if (err)
  168. return err;
  169. }
  170. return 0;
  171. }
  172. static int igt_global_reset(void *arg)
  173. {
  174. struct intel_gt *gt = arg;
  175. unsigned int reset_count;
  176. intel_wakeref_t wakeref;
  177. int err = 0;
  178. /* Check that we can issue a global GPU reset */
  179. igt_global_reset_lock(gt);
  180. wakeref = intel_runtime_pm_get(gt->uncore->rpm);
  181. reset_count = i915_reset_count(&gt->i915->gpu_error);
  182. intel_gt_reset(gt, ALL_ENGINES, NULL);
  183. if (i915_reset_count(&gt->i915->gpu_error) == reset_count) {
  184. pr_err("No GPU reset recorded!\n");
  185. err = -EINVAL;
  186. }
  187. intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  188. igt_global_reset_unlock(gt);
  189. if (intel_gt_is_wedged(gt))
  190. err = -EIO;
  191. return err;
  192. }
  193. static int igt_wedged_reset(void *arg)
  194. {
  195. struct intel_gt *gt = arg;
  196. intel_wakeref_t wakeref;
  197. /* Check that we can recover a wedged device with a GPU reset */
  198. igt_global_reset_lock(gt);
  199. wakeref = intel_runtime_pm_get(gt->uncore->rpm);
  200. intel_gt_set_wedged(gt);
  201. GEM_BUG_ON(!intel_gt_is_wedged(gt));
  202. intel_gt_reset(gt, ALL_ENGINES, NULL);
  203. intel_runtime_pm_put(gt->uncore->rpm, wakeref);
  204. igt_global_reset_unlock(gt);
  205. return intel_gt_is_wedged(gt) ? -EIO : 0;
  206. }
  207. static int igt_atomic_reset(void *arg)
  208. {
  209. struct intel_gt *gt = arg;
  210. const typeof(*igt_atomic_phases) *p;
  211. intel_wakeref_t wakeref;
  212. int err = 0;
  213. /* Check that the resets are usable from atomic context */
  214. wakeref = intel_gt_pm_get(gt);
  215. igt_global_reset_lock(gt);
  216. /* Flush any requests before we get started and check basics */
  217. if (!igt_force_reset(gt))
  218. goto unlock;
  219. for (p = igt_atomic_phases; p->name; p++) {
  220. intel_engine_mask_t awake;
  221. GEM_TRACE("__intel_gt_reset under %s\n", p->name);
  222. awake = reset_prepare(gt);
  223. p->critical_section_begin();
  224. err = intel_gt_reset_all_engines(gt);
  225. p->critical_section_end();
  226. reset_finish(gt, awake);
  227. if (err) {
  228. pr_err("__intel_gt_reset failed under %s\n", p->name);
  229. break;
  230. }
  231. }
  232. /* As we poke around the guts, do a full reset before continuing. */
  233. igt_force_reset(gt);
  234. unlock:
  235. igt_global_reset_unlock(gt);
  236. intel_gt_pm_put(gt, wakeref);
  237. return err;
  238. }
  239. static int igt_atomic_engine_reset(void *arg)
  240. {
  241. struct intel_gt *gt = arg;
  242. const typeof(*igt_atomic_phases) *p;
  243. struct intel_engine_cs *engine;
  244. enum intel_engine_id id;
  245. intel_wakeref_t wakeref;
  246. int err = 0;
  247. /* Check that the resets are usable from atomic context */
  248. if (!intel_has_reset_engine(gt))
  249. return 0;
  250. if (intel_uc_uses_guc_submission(&gt->uc))
  251. return 0;
  252. wakeref = intel_gt_pm_get(gt);
  253. igt_global_reset_lock(gt);
  254. /* Flush any requests before we get started and check basics */
  255. if (!igt_force_reset(gt))
  256. goto out_unlock;
  257. for_each_engine(engine, gt, id) {
  258. struct tasklet_struct *t = &engine->sched_engine->tasklet;
  259. if (t->func)
  260. tasklet_disable(t);
  261. intel_engine_pm_get(engine);
  262. for (p = igt_atomic_phases; p->name; p++) {
  263. GEM_TRACE("intel_engine_reset(%s) under %s\n",
  264. engine->name, p->name);
  265. if (strcmp(p->name, "softirq"))
  266. local_bh_disable();
  267. p->critical_section_begin();
  268. err = __intel_engine_reset_bh(engine, NULL);
  269. p->critical_section_end();
  270. if (strcmp(p->name, "softirq"))
  271. local_bh_enable();
  272. if (err) {
  273. pr_err("intel_engine_reset(%s) failed under %s\n",
  274. engine->name, p->name);
  275. break;
  276. }
  277. }
  278. intel_engine_pm_put(engine);
  279. if (t->func) {
  280. tasklet_enable(t);
  281. tasklet_hi_schedule(t);
  282. }
  283. if (err)
  284. break;
  285. }
  286. /* As we poke around the guts, do a full reset before continuing. */
  287. igt_force_reset(gt);
  288. out_unlock:
  289. igt_global_reset_unlock(gt);
  290. intel_gt_pm_put(gt, wakeref);
  291. return err;
  292. }
  293. int intel_reset_live_selftests(struct drm_i915_private *i915)
  294. {
  295. static const struct i915_subtest tests[] = {
  296. SUBTEST(igt_global_reset), /* attempt to recover GPU first */
  297. SUBTEST(igt_reset_device_stolen),
  298. SUBTEST(igt_reset_engines_stolen),
  299. SUBTEST(igt_wedged_reset),
  300. SUBTEST(igt_atomic_reset),
  301. SUBTEST(igt_atomic_engine_reset),
  302. };
  303. struct intel_gt *gt = to_gt(i915);
  304. if (!intel_has_gpu_reset(gt))
  305. return 0;
  306. if (intel_gt_is_wedged(gt))
  307. return -EIO; /* we're long past hope of a successful reset */
  308. return intel_gt_live_subtests(tests, gt);
  309. }