gen2_engine_cs.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. // SPDX-License-Identifier: MIT
  2. /*
  3. * Copyright © 2020 Intel Corporation
  4. */
  5. #include "gen2_engine_cs.h"
  6. #include "i915_drv.h"
  7. #include "i915_reg.h"
  8. #include "intel_engine.h"
  9. #include "intel_engine_regs.h"
  10. #include "intel_gpu_commands.h"
  11. #include "intel_gt.h"
  12. #include "intel_gt_irq.h"
  13. #include "intel_ring.h"
  14. int gen2_emit_flush(struct i915_request *rq, u32 mode)
  15. {
  16. unsigned int num_store_dw = 12;
  17. u32 cmd, *cs;
  18. cmd = MI_FLUSH;
  19. if (mode & EMIT_INVALIDATE)
  20. cmd |= MI_READ_FLUSH;
  21. cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
  22. if (IS_ERR(cs))
  23. return PTR_ERR(cs);
  24. *cs++ = cmd;
  25. while (num_store_dw--) {
  26. *cs++ = MI_STORE_DWORD_INDEX;
  27. *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
  28. *cs++ = 0;
  29. *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
  30. }
  31. *cs++ = cmd;
  32. intel_ring_advance(rq, cs);
  33. return 0;
  34. }
  35. int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
  36. {
  37. u32 cmd, *cs;
  38. int i;
  39. /*
  40. * read/write caches:
  41. *
  42. * I915_GEM_DOMAIN_RENDER is always invalidated, but is
  43. * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
  44. * also flushed at 2d versus 3d pipeline switches.
  45. *
  46. * read-only caches:
  47. *
  48. * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
  49. * MI_READ_FLUSH is set, and is always flushed on 965.
  50. *
  51. * I915_GEM_DOMAIN_COMMAND may not exist?
  52. *
  53. * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
  54. * invalidated when MI_EXE_FLUSH is set.
  55. *
  56. * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
  57. * invalidated with every MI_FLUSH.
  58. *
  59. * TLBs:
  60. *
  61. * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
  62. * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
  63. * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
  64. * are flushed at any MI_FLUSH.
  65. */
  66. cmd = MI_FLUSH;
  67. if (mode & EMIT_INVALIDATE) {
  68. cmd |= MI_EXE_FLUSH;
  69. if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5)
  70. cmd |= MI_INVALIDATE_ISP;
  71. }
  72. i = 2;
  73. if (mode & EMIT_INVALIDATE)
  74. i += 20;
  75. cs = intel_ring_begin(rq, i);
  76. if (IS_ERR(cs))
  77. return PTR_ERR(cs);
  78. *cs++ = cmd;
  79. /*
  80. * A random delay to let the CS invalidate take effect? Without this
  81. * delay, the GPU relocation path fails as the CS does not see
  82. * the updated contents. Just as important, if we apply the flushes
  83. * to the EMIT_FLUSH branch (i.e. immediately after the relocation
  84. * write and before the invalidate on the next batch), the relocations
  85. * still fail. This implies that is a delay following invalidation
  86. * that is required to reset the caches as opposed to a delay to
  87. * ensure the memory is written.
  88. */
  89. if (mode & EMIT_INVALIDATE) {
  90. *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
  91. *cs++ = intel_gt_scratch_offset(rq->engine->gt,
  92. INTEL_GT_SCRATCH_FIELD_DEFAULT) |
  93. PIPE_CONTROL_GLOBAL_GTT;
  94. *cs++ = 0;
  95. *cs++ = 0;
  96. for (i = 0; i < 12; i++)
  97. *cs++ = MI_FLUSH;
  98. *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
  99. *cs++ = intel_gt_scratch_offset(rq->engine->gt,
  100. INTEL_GT_SCRATCH_FIELD_DEFAULT) |
  101. PIPE_CONTROL_GLOBAL_GTT;
  102. *cs++ = 0;
  103. *cs++ = 0;
  104. }
  105. *cs++ = cmd;
  106. intel_ring_advance(rq, cs);
  107. return 0;
  108. }
  109. int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
  110. {
  111. u32 *cs;
  112. cs = intel_ring_begin(rq, 2);
  113. if (IS_ERR(cs))
  114. return PTR_ERR(cs);
  115. *cs++ = MI_FLUSH;
  116. *cs++ = MI_NOOP;
  117. intel_ring_advance(rq, cs);
  118. return 0;
  119. }
  120. static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
  121. int flush, int post)
  122. {
  123. GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
  124. GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
  125. *cs++ = MI_FLUSH;
  126. while (flush--) {
  127. *cs++ = MI_STORE_DWORD_INDEX;
  128. *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
  129. *cs++ = rq->fence.seqno;
  130. }
  131. while (post--) {
  132. *cs++ = MI_STORE_DWORD_INDEX;
  133. *cs++ = I915_GEM_HWS_SEQNO_ADDR;
  134. *cs++ = rq->fence.seqno;
  135. }
  136. *cs++ = MI_USER_INTERRUPT;
  137. rq->tail = intel_ring_offset(rq, cs);
  138. assert_ring_tail_valid(rq->ring, rq->tail);
  139. return cs;
  140. }
  141. u32 *gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs)
  142. {
  143. return __gen2_emit_breadcrumb(rq, cs, 16, 8);
  144. }
  145. u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
  146. {
  147. return __gen2_emit_breadcrumb(rq, cs, 8, 8);
  148. }
  149. /* Just userspace ABI convention to limit the wa batch bo to a reasonable size */
  150. #define I830_BATCH_LIMIT SZ_256K
  151. #define I830_TLB_ENTRIES (2)
  152. #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
  153. int i830_emit_bb_start(struct i915_request *rq,
  154. u64 offset, u32 len,
  155. unsigned int dispatch_flags)
  156. {
  157. u32 *cs, cs_offset =
  158. intel_gt_scratch_offset(rq->engine->gt,
  159. INTEL_GT_SCRATCH_FIELD_DEFAULT);
  160. GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
  161. cs = intel_ring_begin(rq, 6);
  162. if (IS_ERR(cs))
  163. return PTR_ERR(cs);
  164. /* Evict the invalid PTE TLBs */
  165. *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
  166. *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
  167. *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
  168. *cs++ = cs_offset;
  169. *cs++ = 0xdeadbeef;
  170. *cs++ = MI_NOOP;
  171. intel_ring_advance(rq, cs);
  172. if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
  173. if (len > I830_BATCH_LIMIT)
  174. return -ENOSPC;
  175. cs = intel_ring_begin(rq, 6 + 2);
  176. if (IS_ERR(cs))
  177. return PTR_ERR(cs);
  178. /*
  179. * Blit the batch (which has now all relocs applied) to the
  180. * stable batch scratch bo area (so that the CS never
  181. * stumbles over its tlb invalidation bug) ...
  182. */
  183. *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
  184. *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
  185. *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
  186. *cs++ = cs_offset;
  187. *cs++ = 4096;
  188. *cs++ = offset;
  189. *cs++ = MI_FLUSH;
  190. *cs++ = MI_NOOP;
  191. intel_ring_advance(rq, cs);
  192. /* ... and execute it. */
  193. offset = cs_offset;
  194. }
  195. if (!(dispatch_flags & I915_DISPATCH_SECURE))
  196. offset |= MI_BATCH_NON_SECURE;
  197. cs = intel_ring_begin(rq, 2);
  198. if (IS_ERR(cs))
  199. return PTR_ERR(cs);
  200. *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
  201. *cs++ = offset;
  202. intel_ring_advance(rq, cs);
  203. return 0;
  204. }
  205. int gen2_emit_bb_start(struct i915_request *rq,
  206. u64 offset, u32 len,
  207. unsigned int dispatch_flags)
  208. {
  209. u32 *cs;
  210. if (!(dispatch_flags & I915_DISPATCH_SECURE))
  211. offset |= MI_BATCH_NON_SECURE;
  212. cs = intel_ring_begin(rq, 2);
  213. if (IS_ERR(cs))
  214. return PTR_ERR(cs);
  215. *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
  216. *cs++ = offset;
  217. intel_ring_advance(rq, cs);
  218. return 0;
  219. }
  220. int gen4_emit_bb_start(struct i915_request *rq,
  221. u64 offset, u32 length,
  222. unsigned int dispatch_flags)
  223. {
  224. u32 security;
  225. u32 *cs;
  226. security = MI_BATCH_NON_SECURE_I965;
  227. if (dispatch_flags & I915_DISPATCH_SECURE)
  228. security = 0;
  229. cs = intel_ring_begin(rq, 2);
  230. if (IS_ERR(cs))
  231. return PTR_ERR(cs);
  232. *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
  233. *cs++ = offset;
  234. intel_ring_advance(rq, cs);
  235. return 0;
  236. }
  237. void gen2_irq_enable(struct intel_engine_cs *engine)
  238. {
  239. engine->i915->gen2_imr_mask &= ~engine->irq_enable_mask;
  240. intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->gen2_imr_mask);
  241. intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
  242. }
  243. void gen2_irq_disable(struct intel_engine_cs *engine)
  244. {
  245. engine->i915->gen2_imr_mask |= engine->irq_enable_mask;
  246. intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->gen2_imr_mask);
  247. }
  248. void gen5_irq_enable(struct intel_engine_cs *engine)
  249. {
  250. gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
  251. }
  252. void gen5_irq_disable(struct intel_engine_cs *engine)
  253. {
  254. gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
  255. }