intel_engine_cs.c 69 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562
  1. // SPDX-License-Identifier: MIT
  2. /*
  3. * Copyright © 2016 Intel Corporation
  4. */
  5. #include <linux/string_helpers.h>
  6. #include <drm/drm_print.h>
  7. #include "gem/i915_gem_context.h"
  8. #include "gem/i915_gem_internal.h"
  9. #include "gt/intel_gt_print.h"
  10. #include "gt/intel_gt_regs.h"
  11. #include "i915_cmd_parser.h"
  12. #include "i915_drv.h"
  13. #include "i915_irq.h"
  14. #include "i915_reg.h"
  15. #include "intel_breadcrumbs.h"
  16. #include "intel_context.h"
  17. #include "intel_engine.h"
  18. #include "intel_engine_pm.h"
  19. #include "intel_engine_regs.h"
  20. #include "intel_engine_user.h"
  21. #include "intel_execlists_submission.h"
  22. #include "intel_gt.h"
  23. #include "intel_gt_mcr.h"
  24. #include "intel_gt_pm.h"
  25. #include "intel_gt_requests.h"
  26. #include "intel_lrc.h"
  27. #include "intel_lrc_reg.h"
  28. #include "intel_reset.h"
  29. #include "intel_ring.h"
  30. #include "uc/intel_guc_submission.h"
  31. /* Haswell does have the CXT_SIZE register however it does not appear to be
  32. * valid. Now, docs explain in dwords what is in the context object. The full
  33. * size is 70720 bytes, however, the power context and execlist context will
  34. * never be saved (power context is stored elsewhere, and execlists don't work
  35. * on HSW) - so the final size, including the extra state required for the
  36. * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  37. */
  38. #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
  39. #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
  40. #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
  41. #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
  42. #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
  43. #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
  44. #define MAX_MMIO_BASES 3
  45. struct engine_info {
  46. u8 class;
  47. u8 instance;
  48. /* mmio bases table *must* be sorted in reverse graphics_ver order */
  49. struct engine_mmio_base {
  50. u32 graphics_ver : 8;
  51. u32 base : 24;
  52. } mmio_bases[MAX_MMIO_BASES];
  53. };
  54. static const struct engine_info intel_engines[] = {
  55. [RCS0] = {
  56. .class = RENDER_CLASS,
  57. .instance = 0,
  58. .mmio_bases = {
  59. { .graphics_ver = 1, .base = RENDER_RING_BASE }
  60. },
  61. },
  62. [BCS0] = {
  63. .class = COPY_ENGINE_CLASS,
  64. .instance = 0,
  65. .mmio_bases = {
  66. { .graphics_ver = 6, .base = BLT_RING_BASE }
  67. },
  68. },
  69. [BCS1] = {
  70. .class = COPY_ENGINE_CLASS,
  71. .instance = 1,
  72. .mmio_bases = {
  73. { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
  74. },
  75. },
  76. [BCS2] = {
  77. .class = COPY_ENGINE_CLASS,
  78. .instance = 2,
  79. .mmio_bases = {
  80. { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
  81. },
  82. },
  83. [BCS3] = {
  84. .class = COPY_ENGINE_CLASS,
  85. .instance = 3,
  86. .mmio_bases = {
  87. { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
  88. },
  89. },
  90. [BCS4] = {
  91. .class = COPY_ENGINE_CLASS,
  92. .instance = 4,
  93. .mmio_bases = {
  94. { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
  95. },
  96. },
  97. [BCS5] = {
  98. .class = COPY_ENGINE_CLASS,
  99. .instance = 5,
  100. .mmio_bases = {
  101. { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
  102. },
  103. },
  104. [BCS6] = {
  105. .class = COPY_ENGINE_CLASS,
  106. .instance = 6,
  107. .mmio_bases = {
  108. { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
  109. },
  110. },
  111. [BCS7] = {
  112. .class = COPY_ENGINE_CLASS,
  113. .instance = 7,
  114. .mmio_bases = {
  115. { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
  116. },
  117. },
  118. [BCS8] = {
  119. .class = COPY_ENGINE_CLASS,
  120. .instance = 8,
  121. .mmio_bases = {
  122. { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
  123. },
  124. },
  125. [VCS0] = {
  126. .class = VIDEO_DECODE_CLASS,
  127. .instance = 0,
  128. .mmio_bases = {
  129. { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE },
  130. { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE },
  131. { .graphics_ver = 4, .base = BSD_RING_BASE }
  132. },
  133. },
  134. [VCS1] = {
  135. .class = VIDEO_DECODE_CLASS,
  136. .instance = 1,
  137. .mmio_bases = {
  138. { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE },
  139. { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE }
  140. },
  141. },
  142. [VCS2] = {
  143. .class = VIDEO_DECODE_CLASS,
  144. .instance = 2,
  145. .mmio_bases = {
  146. { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE }
  147. },
  148. },
  149. [VCS3] = {
  150. .class = VIDEO_DECODE_CLASS,
  151. .instance = 3,
  152. .mmio_bases = {
  153. { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE }
  154. },
  155. },
  156. [VCS4] = {
  157. .class = VIDEO_DECODE_CLASS,
  158. .instance = 4,
  159. .mmio_bases = {
  160. { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE }
  161. },
  162. },
  163. [VCS5] = {
  164. .class = VIDEO_DECODE_CLASS,
  165. .instance = 5,
  166. .mmio_bases = {
  167. { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE }
  168. },
  169. },
  170. [VCS6] = {
  171. .class = VIDEO_DECODE_CLASS,
  172. .instance = 6,
  173. .mmio_bases = {
  174. { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE }
  175. },
  176. },
  177. [VCS7] = {
  178. .class = VIDEO_DECODE_CLASS,
  179. .instance = 7,
  180. .mmio_bases = {
  181. { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE }
  182. },
  183. },
  184. [VECS0] = {
  185. .class = VIDEO_ENHANCEMENT_CLASS,
  186. .instance = 0,
  187. .mmio_bases = {
  188. { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE },
  189. { .graphics_ver = 7, .base = VEBOX_RING_BASE }
  190. },
  191. },
  192. [VECS1] = {
  193. .class = VIDEO_ENHANCEMENT_CLASS,
  194. .instance = 1,
  195. .mmio_bases = {
  196. { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE }
  197. },
  198. },
  199. [VECS2] = {
  200. .class = VIDEO_ENHANCEMENT_CLASS,
  201. .instance = 2,
  202. .mmio_bases = {
  203. { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE }
  204. },
  205. },
  206. [VECS3] = {
  207. .class = VIDEO_ENHANCEMENT_CLASS,
  208. .instance = 3,
  209. .mmio_bases = {
  210. { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
  211. },
  212. },
  213. [CCS0] = {
  214. .class = COMPUTE_CLASS,
  215. .instance = 0,
  216. .mmio_bases = {
  217. { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }
  218. }
  219. },
  220. [CCS1] = {
  221. .class = COMPUTE_CLASS,
  222. .instance = 1,
  223. .mmio_bases = {
  224. { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }
  225. }
  226. },
  227. [CCS2] = {
  228. .class = COMPUTE_CLASS,
  229. .instance = 2,
  230. .mmio_bases = {
  231. { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }
  232. }
  233. },
  234. [CCS3] = {
  235. .class = COMPUTE_CLASS,
  236. .instance = 3,
  237. .mmio_bases = {
  238. { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }
  239. }
  240. },
  241. [GSC0] = {
  242. .class = OTHER_CLASS,
  243. .instance = OTHER_GSC_INSTANCE,
  244. .mmio_bases = {
  245. { .graphics_ver = 12, .base = MTL_GSC_RING_BASE }
  246. }
  247. },
  248. };
  249. /**
  250. * intel_engine_context_size() - return the size of the context for an engine
  251. * @gt: the gt
  252. * @class: engine class
  253. *
  254. * Each engine class may require a different amount of space for a context
  255. * image.
  256. *
  257. * Return: size (in bytes) of an engine class specific context image
  258. *
  259. * Note: this size includes the HWSP, which is part of the context image
  260. * in LRC mode, but does not include the "shared data page" used with
  261. * GuC submission. The caller should account for this if using the GuC.
  262. */
  263. u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
  264. {
  265. struct intel_uncore *uncore = gt->uncore;
  266. u32 cxt_size;
  267. BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
  268. switch (class) {
  269. case COMPUTE_CLASS:
  270. fallthrough;
  271. case RENDER_CLASS:
  272. switch (GRAPHICS_VER(gt->i915)) {
  273. default:
  274. MISSING_CASE(GRAPHICS_VER(gt->i915));
  275. return DEFAULT_LR_CONTEXT_RENDER_SIZE;
  276. case 12:
  277. case 11:
  278. return GEN11_LR_CONTEXT_RENDER_SIZE;
  279. case 9:
  280. return GEN9_LR_CONTEXT_RENDER_SIZE;
  281. case 8:
  282. return GEN8_LR_CONTEXT_RENDER_SIZE;
  283. case 7:
  284. if (IS_HASWELL(gt->i915))
  285. return HSW_CXT_TOTAL_SIZE;
  286. cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
  287. return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
  288. PAGE_SIZE);
  289. case 6:
  290. cxt_size = intel_uncore_read(uncore, CXT_SIZE);
  291. return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
  292. PAGE_SIZE);
  293. case 5:
  294. case 4:
  295. /*
  296. * There is a discrepancy here between the size reported
  297. * by the register and the size of the context layout
  298. * in the docs. Both are described as authoritative!
  299. *
  300. * The discrepancy is on the order of a few cachelines,
  301. * but the total is under one page (4k), which is our
  302. * minimum allocation anyway so it should all come
  303. * out in the wash.
  304. */
  305. cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
  306. gt_dbg(gt, "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
  307. GRAPHICS_VER(gt->i915), cxt_size * 64,
  308. cxt_size - 1);
  309. return round_up(cxt_size * 64, PAGE_SIZE);
  310. case 3:
  311. case 2:
  312. /* For the special day when i810 gets merged. */
  313. case 1:
  314. return 0;
  315. }
  316. break;
  317. default:
  318. MISSING_CASE(class);
  319. fallthrough;
  320. case VIDEO_DECODE_CLASS:
  321. case VIDEO_ENHANCEMENT_CLASS:
  322. case COPY_ENGINE_CLASS:
  323. case OTHER_CLASS:
  324. if (GRAPHICS_VER(gt->i915) < 8)
  325. return 0;
  326. return GEN8_LR_CONTEXT_OTHER_SIZE;
  327. }
  328. }
  329. static u32 __engine_mmio_base(struct drm_i915_private *i915,
  330. const struct engine_mmio_base *bases)
  331. {
  332. int i;
  333. for (i = 0; i < MAX_MMIO_BASES; i++)
  334. if (GRAPHICS_VER(i915) >= bases[i].graphics_ver)
  335. break;
  336. GEM_BUG_ON(i == MAX_MMIO_BASES);
  337. GEM_BUG_ON(!bases[i].base);
  338. return bases[i].base;
  339. }
  340. static void __sprint_engine_name(struct intel_engine_cs *engine)
  341. {
  342. /*
  343. * Before we know what the uABI name for this engine will be,
  344. * we still would like to keep track of this engine in the debug logs.
  345. * We throw in a ' here as a reminder that this isn't its final name.
  346. */
  347. GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
  348. intel_engine_class_repr(engine->class),
  349. engine->instance) >= sizeof(engine->name));
  350. }
  351. void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
  352. {
  353. /*
  354. * Though they added more rings on g4x/ilk, they did not add
  355. * per-engine HWSTAM until gen6.
  356. */
  357. if (GRAPHICS_VER(engine->i915) < 6 && engine->class != RENDER_CLASS)
  358. return;
  359. if (GRAPHICS_VER(engine->i915) >= 3)
  360. ENGINE_WRITE(engine, RING_HWSTAM, mask);
  361. else
  362. ENGINE_WRITE16(engine, RING_HWSTAM, mask);
  363. }
  364. static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
  365. {
  366. /* Mask off all writes into the unknown HWSP */
  367. intel_engine_set_hwsp_writemask(engine, ~0u);
  368. }
  369. static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
  370. {
  371. GEM_DEBUG_WARN_ON(iir);
  372. }
  373. static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
  374. {
  375. u32 reset_domain;
  376. if (ver >= 11) {
  377. static const u32 engine_reset_domains[] = {
  378. [RCS0] = GEN11_GRDOM_RENDER,
  379. [BCS0] = GEN11_GRDOM_BLT,
  380. [BCS1] = XEHPC_GRDOM_BLT1,
  381. [BCS2] = XEHPC_GRDOM_BLT2,
  382. [BCS3] = XEHPC_GRDOM_BLT3,
  383. [BCS4] = XEHPC_GRDOM_BLT4,
  384. [BCS5] = XEHPC_GRDOM_BLT5,
  385. [BCS6] = XEHPC_GRDOM_BLT6,
  386. [BCS7] = XEHPC_GRDOM_BLT7,
  387. [BCS8] = XEHPC_GRDOM_BLT8,
  388. [VCS0] = GEN11_GRDOM_MEDIA,
  389. [VCS1] = GEN11_GRDOM_MEDIA2,
  390. [VCS2] = GEN11_GRDOM_MEDIA3,
  391. [VCS3] = GEN11_GRDOM_MEDIA4,
  392. [VCS4] = GEN11_GRDOM_MEDIA5,
  393. [VCS5] = GEN11_GRDOM_MEDIA6,
  394. [VCS6] = GEN11_GRDOM_MEDIA7,
  395. [VCS7] = GEN11_GRDOM_MEDIA8,
  396. [VECS0] = GEN11_GRDOM_VECS,
  397. [VECS1] = GEN11_GRDOM_VECS2,
  398. [VECS2] = GEN11_GRDOM_VECS3,
  399. [VECS3] = GEN11_GRDOM_VECS4,
  400. [CCS0] = GEN11_GRDOM_RENDER,
  401. [CCS1] = GEN11_GRDOM_RENDER,
  402. [CCS2] = GEN11_GRDOM_RENDER,
  403. [CCS3] = GEN11_GRDOM_RENDER,
  404. [GSC0] = GEN12_GRDOM_GSC,
  405. };
  406. GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
  407. !engine_reset_domains[id]);
  408. reset_domain = engine_reset_domains[id];
  409. } else {
  410. static const u32 engine_reset_domains[] = {
  411. [RCS0] = GEN6_GRDOM_RENDER,
  412. [BCS0] = GEN6_GRDOM_BLT,
  413. [VCS0] = GEN6_GRDOM_MEDIA,
  414. [VCS1] = GEN8_GRDOM_MEDIA2,
  415. [VECS0] = GEN6_GRDOM_VECS,
  416. };
  417. GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
  418. !engine_reset_domains[id]);
  419. reset_domain = engine_reset_domains[id];
  420. }
  421. return reset_domain;
  422. }
  423. static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
  424. u8 logical_instance)
  425. {
  426. const struct engine_info *info = &intel_engines[id];
  427. struct drm_i915_private *i915 = gt->i915;
  428. struct intel_engine_cs *engine;
  429. u8 guc_class;
  430. BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
  431. BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
  432. BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1));
  433. BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1));
  434. if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
  435. return -EINVAL;
  436. if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
  437. return -EINVAL;
  438. if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
  439. return -EINVAL;
  440. if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
  441. return -EINVAL;
  442. engine = kzalloc_obj(*engine);
  443. if (!engine)
  444. return -ENOMEM;
  445. BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
  446. INIT_LIST_HEAD(&engine->pinned_contexts_list);
  447. engine->id = id;
  448. engine->legacy_idx = INVALID_ENGINE;
  449. engine->mask = BIT(id);
  450. engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915),
  451. id);
  452. engine->i915 = i915;
  453. engine->gt = gt;
  454. engine->uncore = gt->uncore;
  455. guc_class = engine_class_to_guc_class(info->class);
  456. engine->guc_id = MAKE_GUC_ID(guc_class, info->instance);
  457. engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
  458. engine->irq_handler = nop_irq_handler;
  459. engine->class = info->class;
  460. engine->instance = info->instance;
  461. engine->logical_mask = BIT(logical_instance);
  462. __sprint_engine_name(engine);
  463. if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
  464. __ffs(CCS_MASK(engine->gt) | RCS_MASK(engine->gt)) == engine->instance)
  465. engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
  466. /* features common between engines sharing EUs */
  467. if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
  468. engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
  469. engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
  470. }
  471. engine->props.heartbeat_interval_ms =
  472. CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
  473. engine->props.max_busywait_duration_ns =
  474. CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
  475. engine->props.preempt_timeout_ms =
  476. CONFIG_DRM_I915_PREEMPT_TIMEOUT;
  477. engine->props.stop_timeout_ms =
  478. CONFIG_DRM_I915_STOP_TIMEOUT;
  479. engine->props.timeslice_duration_ms =
  480. CONFIG_DRM_I915_TIMESLICE_DURATION;
  481. /*
  482. * Mid-thread pre-emption is not available in Gen12. Unfortunately,
  483. * some compute workloads run quite long threads. That means they get
  484. * reset due to not pre-empting in a timely manner. So, bump the
  485. * pre-emption timeout value to be much higher for compute engines.
  486. */
  487. if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
  488. engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
  489. /* Cap properties according to any system limits */
  490. #define CLAMP_PROP(field) \
  491. do { \
  492. u64 clamp = intel_clamp_##field(engine, engine->props.field); \
  493. if (clamp != engine->props.field) { \
  494. drm_notice(&engine->i915->drm, \
  495. "Warning, clamping %s to %lld to prevent overflow\n", \
  496. #field, clamp); \
  497. engine->props.field = clamp; \
  498. } \
  499. } while (0)
  500. CLAMP_PROP(heartbeat_interval_ms);
  501. CLAMP_PROP(max_busywait_duration_ns);
  502. CLAMP_PROP(preempt_timeout_ms);
  503. CLAMP_PROP(stop_timeout_ms);
  504. CLAMP_PROP(timeslice_duration_ms);
  505. #undef CLAMP_PROP
  506. engine->defaults = engine->props; /* never to change again */
  507. engine->context_size = intel_engine_context_size(gt, engine->class);
  508. if (WARN_ON(engine->context_size > BIT(20)))
  509. engine->context_size = 0;
  510. if (engine->context_size)
  511. DRIVER_CAPS(i915)->has_logical_contexts = true;
  512. ewma__engine_latency_init(&engine->latency);
  513. ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
  514. /* Scrub mmio state on takeover */
  515. intel_engine_sanitize_mmio(engine);
  516. gt->engine_class[info->class][info->instance] = engine;
  517. gt->engine[id] = engine;
  518. return 0;
  519. }
  520. u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
  521. {
  522. value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
  523. return value;
  524. }
  525. u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
  526. {
  527. value = min(value, jiffies_to_nsecs(2));
  528. return value;
  529. }
  530. u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
  531. {
  532. /*
  533. * NB: The GuC API only supports 32bit values. However, the limit is further
  534. * reduced due to internal calculations which would otherwise overflow.
  535. */
  536. if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
  537. value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
  538. value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
  539. return value;
  540. }
  541. u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
  542. {
  543. value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
  544. return value;
  545. }
  546. u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
  547. {
  548. /*
  549. * NB: The GuC API only supports 32bit values. However, the limit is further
  550. * reduced due to internal calculations which would otherwise overflow.
  551. */
  552. if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
  553. value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
  554. value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
  555. return value;
  556. }
  557. static void __setup_engine_capabilities(struct intel_engine_cs *engine)
  558. {
  559. struct drm_i915_private *i915 = engine->i915;
  560. if (engine->class == VIDEO_DECODE_CLASS) {
  561. /*
  562. * HEVC support is present on first engine instance
  563. * before Gen11 and on all instances afterwards.
  564. */
  565. if (GRAPHICS_VER(i915) >= 11 ||
  566. (GRAPHICS_VER(i915) >= 9 && engine->instance == 0))
  567. engine->uabi_capabilities |=
  568. I915_VIDEO_CLASS_CAPABILITY_HEVC;
  569. /*
  570. * SFC block is present only on even logical engine
  571. * instances.
  572. */
  573. if ((GRAPHICS_VER(i915) >= 11 &&
  574. (engine->gt->info.vdbox_sfc_access &
  575. BIT(engine->instance))) ||
  576. (GRAPHICS_VER(i915) >= 9 && engine->instance == 0))
  577. engine->uabi_capabilities |=
  578. I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
  579. } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
  580. if (GRAPHICS_VER(i915) >= 9 &&
  581. engine->gt->info.sfc_mask & BIT(engine->instance))
  582. engine->uabi_capabilities |=
  583. I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
  584. }
  585. }
  586. static void intel_setup_engine_capabilities(struct intel_gt *gt)
  587. {
  588. struct intel_engine_cs *engine;
  589. enum intel_engine_id id;
  590. for_each_engine(engine, gt, id)
  591. __setup_engine_capabilities(engine);
  592. }
  593. /**
  594. * intel_engines_release() - free the resources allocated for Command Streamers
  595. * @gt: pointer to struct intel_gt
  596. */
  597. void intel_engines_release(struct intel_gt *gt)
  598. {
  599. struct intel_engine_cs *engine;
  600. enum intel_engine_id id;
  601. /*
  602. * Before we release the resources held by engine, we must be certain
  603. * that the HW is no longer accessing them -- having the GPU scribble
  604. * to or read from a page being used for something else causes no end
  605. * of fun.
  606. *
  607. * The GPU should be reset by this point, but assume the worst just
  608. * in case we aborted before completely initialising the engines.
  609. */
  610. GEM_BUG_ON(intel_gt_pm_is_awake(gt));
  611. if (!intel_gt_gpu_reset_clobbers_display(gt))
  612. intel_gt_reset_all_engines(gt);
  613. /* Decouple the backend; but keep the layout for late GPU resets */
  614. for_each_engine(engine, gt, id) {
  615. if (!engine->release)
  616. continue;
  617. intel_wakeref_wait_for_idle(&engine->wakeref);
  618. GEM_BUG_ON(intel_engine_pm_is_awake(engine));
  619. engine->release(engine);
  620. engine->release = NULL;
  621. memset(&engine->reset, 0, sizeof(engine->reset));
  622. }
  623. llist_del_all(&gt->i915->uabi_engines_llist);
  624. }
  625. void intel_engine_free_request_pool(struct intel_engine_cs *engine)
  626. {
  627. if (!engine->request_pool)
  628. return;
  629. kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
  630. }
  631. void intel_engines_free(struct intel_gt *gt)
  632. {
  633. struct intel_engine_cs *engine;
  634. enum intel_engine_id id;
  635. /* Free the requests! dma-resv keeps fences around for an eternity */
  636. rcu_barrier();
  637. for_each_engine(engine, gt, id) {
  638. intel_engine_free_request_pool(engine);
  639. kfree(engine);
  640. gt->engine[id] = NULL;
  641. }
  642. }
  643. static
  644. bool gen11_vdbox_has_sfc(struct intel_gt *gt,
  645. unsigned int physical_vdbox,
  646. unsigned int logical_vdbox, u16 vdbox_mask)
  647. {
  648. struct drm_i915_private *i915 = gt->i915;
  649. /*
  650. * In Gen11, only even numbered logical VDBOXes are hooked
  651. * up to an SFC (Scaler & Format Converter) unit.
  652. * In Gen12, Even numbered physical instance always are connected
  653. * to an SFC. Odd numbered physical instances have SFC only if
  654. * previous even instance is fused off.
  655. *
  656. * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field
  657. * in the fuse register that tells us whether a specific SFC is present.
  658. */
  659. if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0)
  660. return false;
  661. else if (MEDIA_VER(i915) >= 12)
  662. return (physical_vdbox % 2 == 0) ||
  663. !(BIT(physical_vdbox - 1) & vdbox_mask);
  664. else if (MEDIA_VER(i915) == 11)
  665. return logical_vdbox % 2 == 0;
  666. return false;
  667. }
  668. static void engine_mask_apply_media_fuses(struct intel_gt *gt)
  669. {
  670. struct drm_i915_private *i915 = gt->i915;
  671. unsigned int logical_vdbox = 0;
  672. unsigned int i;
  673. u32 media_fuse, fuse1;
  674. u16 vdbox_mask;
  675. u16 vebox_mask;
  676. if (MEDIA_VER(gt->i915) < 11)
  677. return;
  678. /*
  679. * On newer platforms the fusing register is called 'enable' and has
  680. * enable semantics, while on older platforms it is called 'disable'
  681. * and bits have disable semantices.
  682. */
  683. media_fuse = intel_uncore_read(gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
  684. if (MEDIA_VER_FULL(i915) < IP_VER(12, 55))
  685. media_fuse = ~media_fuse;
  686. vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse);
  687. vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse);
  688. if (MEDIA_VER_FULL(i915) >= IP_VER(12, 55)) {
  689. fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
  690. gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
  691. } else {
  692. gt->info.sfc_mask = ~0;
  693. }
  694. for (i = 0; i < I915_MAX_VCS; i++) {
  695. if (!HAS_ENGINE(gt, _VCS(i))) {
  696. vdbox_mask &= ~BIT(i);
  697. continue;
  698. }
  699. if (!(BIT(i) & vdbox_mask)) {
  700. gt->info.engine_mask &= ~BIT(_VCS(i));
  701. gt_dbg(gt, "vcs%u fused off\n", i);
  702. continue;
  703. }
  704. if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask))
  705. gt->info.vdbox_sfc_access |= BIT(i);
  706. logical_vdbox++;
  707. }
  708. gt_dbg(gt, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt));
  709. GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
  710. for (i = 0; i < I915_MAX_VECS; i++) {
  711. if (!HAS_ENGINE(gt, _VECS(i))) {
  712. vebox_mask &= ~BIT(i);
  713. continue;
  714. }
  715. if (!(BIT(i) & vebox_mask)) {
  716. gt->info.engine_mask &= ~BIT(_VECS(i));
  717. gt_dbg(gt, "vecs%u fused off\n", i);
  718. }
  719. }
  720. gt_dbg(gt, "vebox enable: %04x, instances: %04lx\n", vebox_mask, VEBOX_MASK(gt));
  721. GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
  722. }
  723. static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
  724. {
  725. struct drm_i915_private *i915 = gt->i915;
  726. struct intel_gt_info *info = &gt->info;
  727. int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS;
  728. unsigned long ccs_mask;
  729. unsigned int i;
  730. if (GRAPHICS_VER(i915) < 11)
  731. return;
  732. if (hweight32(CCS_MASK(gt)) <= 1)
  733. return;
  734. ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
  735. ss_per_ccs);
  736. /*
  737. * If all DSS in a quadrant are fused off, the corresponding CCS
  738. * engine is not available for use.
  739. */
  740. for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
  741. info->engine_mask &= ~BIT(_CCS(i));
  742. gt_dbg(gt, "ccs%u fused off\n", i);
  743. }
  744. }
  745. /*
  746. * Determine which engines are fused off in our particular hardware.
  747. * Note that we have a catch-22 situation where we need to be able to access
  748. * the blitter forcewake domain to read the engine fuses, but at the same time
  749. * we need to know which engines are available on the system to know which
  750. * forcewake domains are present. We solve this by initializing the forcewake
  751. * domains based on the full engine mask in the platform capabilities before
  752. * calling this function and pruning the domains for fused-off engines
  753. * afterwards.
  754. */
  755. static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
  756. {
  757. struct intel_gt_info *info = &gt->info;
  758. GEM_BUG_ON(!info->engine_mask);
  759. engine_mask_apply_media_fuses(gt);
  760. engine_mask_apply_compute_fuses(gt);
  761. /*
  762. * The only use of the GSC CS is to load and communicate with the GSC
  763. * FW, so we have no use for it if we don't have the FW.
  764. *
  765. * IMPORTANT: in cases where we don't have the GSC FW, we have a
  766. * catch-22 situation that breaks media C6 due to 2 requirements:
  767. * 1) once turned on, the GSC power well will not go to sleep unless the
  768. * GSC FW is loaded.
  769. * 2) to enable idling (which is required for media C6) we need to
  770. * initialize the IDLE_MSG register for the GSC CS and do at least 1
  771. * submission, which will wake up the GSC power well.
  772. */
  773. if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(&gt->uc)) {
  774. gt_notice(gt, "No GSC FW selected, disabling GSC CS and media C6\n");
  775. info->engine_mask &= ~BIT(GSC0);
  776. }
  777. /*
  778. * Do not create the command streamer for CCS slices beyond the first.
  779. * All the workload submitted to the first engine will be shared among
  780. * all the slices.
  781. *
  782. * Once the user will be allowed to customize the CCS mode, then this
  783. * check needs to be removed.
  784. */
  785. if (IS_DG2(gt->i915)) {
  786. u8 first_ccs = __ffs(CCS_MASK(gt));
  787. /*
  788. * Store the number of active cslices before
  789. * changing the CCS engine configuration
  790. */
  791. gt->ccs.cslices = CCS_MASK(gt);
  792. /* Mask off all the CCS engine */
  793. info->engine_mask &= ~GENMASK(CCS3, CCS0);
  794. /* Put back in the first CCS engine */
  795. info->engine_mask |= BIT(_CCS(first_ccs));
  796. }
  797. return info->engine_mask;
  798. }
  799. static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
  800. u8 class, const u8 *map, u8 num_instances)
  801. {
  802. int i, j;
  803. u8 current_logical_id = 0;
  804. for (j = 0; j < num_instances; ++j) {
  805. for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
  806. if (!HAS_ENGINE(gt, i) ||
  807. intel_engines[i].class != class)
  808. continue;
  809. if (intel_engines[i].instance == map[j]) {
  810. logical_ids[intel_engines[i].instance] =
  811. current_logical_id++;
  812. break;
  813. }
  814. }
  815. }
  816. }
  817. static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
  818. {
  819. /*
  820. * Logical to physical mapping is needed for proper support
  821. * to split-frame feature.
  822. */
  823. if (MEDIA_VER(gt->i915) >= 11 && class == VIDEO_DECODE_CLASS) {
  824. const u8 map[] = { 0, 2, 4, 6, 1, 3, 5, 7 };
  825. populate_logical_ids(gt, logical_ids, class,
  826. map, ARRAY_SIZE(map));
  827. } else {
  828. int i;
  829. u8 map[MAX_ENGINE_INSTANCE + 1];
  830. for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
  831. map[i] = i;
  832. populate_logical_ids(gt, logical_ids, class,
  833. map, ARRAY_SIZE(map));
  834. }
  835. }
  836. /**
  837. * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
  838. * @gt: pointer to struct intel_gt
  839. *
  840. * Return: non-zero if the initialization failed.
  841. */
  842. int intel_engines_init_mmio(struct intel_gt *gt)
  843. {
  844. struct drm_i915_private *i915 = gt->i915;
  845. const unsigned int engine_mask = init_engine_mask(gt);
  846. unsigned int mask = 0;
  847. unsigned int i, class;
  848. u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
  849. int err;
  850. drm_WARN_ON(&i915->drm, engine_mask == 0);
  851. drm_WARN_ON(&i915->drm, engine_mask &
  852. GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
  853. for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
  854. setup_logical_ids(gt, logical_ids, class);
  855. for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
  856. u8 instance = intel_engines[i].instance;
  857. if (intel_engines[i].class != class ||
  858. !HAS_ENGINE(gt, i))
  859. continue;
  860. err = intel_engine_setup(gt, i,
  861. logical_ids[instance]);
  862. if (err)
  863. goto cleanup;
  864. mask |= BIT(i);
  865. }
  866. }
  867. /*
  868. * Catch failures to update intel_engines table when the new engines
  869. * are added to the driver by a warning and disabling the forgotten
  870. * engines.
  871. */
  872. if (drm_WARN_ON(&i915->drm, mask != engine_mask))
  873. gt->info.engine_mask = mask;
  874. gt->info.num_engines = hweight32(mask);
  875. intel_gt_check_and_clear_faults(gt);
  876. intel_setup_engine_capabilities(gt);
  877. intel_uncore_prune_engine_fw_domains(gt->uncore, gt);
  878. return 0;
  879. cleanup:
  880. intel_engines_free(gt);
  881. return err;
  882. }
  883. ALLOW_ERROR_INJECTION(intel_engines_init_mmio, ERRNO);
  884. void intel_engine_init_execlists(struct intel_engine_cs *engine)
  885. {
  886. struct intel_engine_execlists * const execlists = &engine->execlists;
  887. execlists->port_mask = 1;
  888. GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
  889. GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
  890. memset(execlists->pending, 0, sizeof(execlists->pending));
  891. execlists->active =
  892. memset(execlists->inflight, 0, sizeof(execlists->inflight));
  893. }
  894. static void cleanup_status_page(struct intel_engine_cs *engine)
  895. {
  896. struct i915_vma *vma;
  897. /* Prevent writes into HWSP after returning the page to the system */
  898. intel_engine_set_hwsp_writemask(engine, ~0u);
  899. vma = fetch_and_zero(&engine->status_page.vma);
  900. if (!vma)
  901. return;
  902. if (!HWS_NEEDS_PHYSICAL(engine->i915))
  903. i915_vma_unpin(vma);
  904. i915_gem_object_unpin_map(vma->obj);
  905. i915_gem_object_put(vma->obj);
  906. }
  907. static int pin_ggtt_status_page(struct intel_engine_cs *engine,
  908. struct i915_gem_ww_ctx *ww,
  909. struct i915_vma *vma)
  910. {
  911. unsigned int flags;
  912. if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
  913. /*
  914. * On g33, we cannot place HWS above 256MiB, so
  915. * restrict its pinning to the low mappable arena.
  916. * Though this restriction is not documented for
  917. * gen4, gen5, or byt, they also behave similarly
  918. * and hang if the HWS is placed at the top of the
  919. * GTT. To generalise, it appears that all !llc
  920. * platforms have issues with us placing the HWS
  921. * above the mappable region (even though we never
  922. * actually map it).
  923. */
  924. flags = PIN_MAPPABLE;
  925. else
  926. flags = PIN_HIGH;
  927. return i915_ggtt_pin(vma, ww, 0, flags);
  928. }
  929. static int init_status_page(struct intel_engine_cs *engine)
  930. {
  931. struct drm_i915_gem_object *obj;
  932. struct i915_gem_ww_ctx ww;
  933. struct i915_vma *vma;
  934. void *vaddr;
  935. int ret;
  936. INIT_LIST_HEAD(&engine->status_page.timelines);
  937. /*
  938. * Though the HWS register does support 36bit addresses, historically
  939. * we have had hangs and corruption reported due to wild writes if
  940. * the HWS is placed above 4G. We only allow objects to be allocated
  941. * in GFP_DMA32 for i965, and no earlier physical address users had
  942. * access to more than 4G.
  943. */
  944. obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
  945. if (IS_ERR(obj)) {
  946. gt_err(engine->gt, "Failed to allocate status page\n");
  947. return PTR_ERR(obj);
  948. }
  949. i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
  950. vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
  951. if (IS_ERR(vma)) {
  952. ret = PTR_ERR(vma);
  953. goto err_put;
  954. }
  955. i915_gem_ww_ctx_init(&ww, true);
  956. retry:
  957. ret = i915_gem_object_lock(obj, &ww);
  958. if (!ret && !HWS_NEEDS_PHYSICAL(engine->i915))
  959. ret = pin_ggtt_status_page(engine, &ww, vma);
  960. if (ret)
  961. goto err;
  962. vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
  963. if (IS_ERR(vaddr)) {
  964. ret = PTR_ERR(vaddr);
  965. goto err_unpin;
  966. }
  967. engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
  968. engine->status_page.vma = vma;
  969. err_unpin:
  970. if (ret)
  971. i915_vma_unpin(vma);
  972. err:
  973. if (ret == -EDEADLK) {
  974. ret = i915_gem_ww_ctx_backoff(&ww);
  975. if (!ret)
  976. goto retry;
  977. }
  978. i915_gem_ww_ctx_fini(&ww);
  979. err_put:
  980. if (ret)
  981. i915_gem_object_put(obj);
  982. return ret;
  983. }
  984. static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
  985. {
  986. static const union intel_engine_tlb_inv_reg gen8_regs[] = {
  987. [RENDER_CLASS].reg = GEN8_RTCR,
  988. [VIDEO_DECODE_CLASS].reg = GEN8_M1TCR, /* , GEN8_M2TCR */
  989. [VIDEO_ENHANCEMENT_CLASS].reg = GEN8_VTCR,
  990. [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
  991. };
  992. static const union intel_engine_tlb_inv_reg gen12_regs[] = {
  993. [RENDER_CLASS].reg = GEN12_GFX_TLB_INV_CR,
  994. [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
  995. [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
  996. [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
  997. [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
  998. };
  999. static const union intel_engine_tlb_inv_reg xehp_regs[] = {
  1000. [RENDER_CLASS].mcr_reg = XEHP_GFX_TLB_INV_CR,
  1001. [VIDEO_DECODE_CLASS].mcr_reg = XEHP_VD_TLB_INV_CR,
  1002. [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
  1003. [COPY_ENGINE_CLASS].mcr_reg = XEHP_BLT_TLB_INV_CR,
  1004. [COMPUTE_CLASS].mcr_reg = XEHP_COMPCTX_TLB_INV_CR,
  1005. };
  1006. static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
  1007. [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
  1008. [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
  1009. [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR,
  1010. };
  1011. struct drm_i915_private *i915 = engine->i915;
  1012. const unsigned int instance = engine->instance;
  1013. const unsigned int class = engine->class;
  1014. const union intel_engine_tlb_inv_reg *regs;
  1015. union intel_engine_tlb_inv_reg reg;
  1016. unsigned int num = 0;
  1017. u32 val;
  1018. /*
  1019. * New platforms should not be added with catch-all-newer (>=)
  1020. * condition so that any later platform added triggers the below warning
  1021. * and in turn mandates a human cross-check of whether the invalidation
  1022. * flows have compatible semantics.
  1023. *
  1024. * For instance with the 11.00 -> 12.00 transition three out of five
  1025. * respective engine registers were moved to masked type. Then after the
  1026. * 12.00 -> 12.50 transition multi cast handling is required too.
  1027. */
  1028. if (engine->gt->type == GT_MEDIA) {
  1029. if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
  1030. regs = xelpmp_regs;
  1031. num = ARRAY_SIZE(xelpmp_regs);
  1032. }
  1033. } else {
  1034. if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) ||
  1035. GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
  1036. GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
  1037. GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
  1038. regs = xehp_regs;
  1039. num = ARRAY_SIZE(xehp_regs);
  1040. } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
  1041. GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
  1042. regs = gen12_regs;
  1043. num = ARRAY_SIZE(gen12_regs);
  1044. } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
  1045. regs = gen8_regs;
  1046. num = ARRAY_SIZE(gen8_regs);
  1047. } else if (GRAPHICS_VER(i915) < 8) {
  1048. return 0;
  1049. }
  1050. }
  1051. if (gt_WARN_ONCE(engine->gt, !num,
  1052. "Platform does not implement TLB invalidation!"))
  1053. return -ENODEV;
  1054. if (gt_WARN_ON_ONCE(engine->gt,
  1055. class >= num ||
  1056. (!regs[class].reg.reg &&
  1057. !regs[class].mcr_reg.reg)))
  1058. return -ERANGE;
  1059. reg = regs[class];
  1060. if (regs == xelpmp_regs && class == OTHER_CLASS) {
  1061. /*
  1062. * There's only a single GSC instance, but it uses register bit
  1063. * 1 instead of either 0 or OTHER_GSC_INSTANCE.
  1064. */
  1065. GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
  1066. val = 1;
  1067. } else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) {
  1068. reg.reg = GEN8_M2TCR;
  1069. val = 0;
  1070. } else {
  1071. val = instance;
  1072. }
  1073. val = BIT(val);
  1074. engine->tlb_inv.mcr = regs == xehp_regs;
  1075. engine->tlb_inv.reg = reg;
  1076. engine->tlb_inv.done = val;
  1077. if (GRAPHICS_VER(i915) >= 12 &&
  1078. (engine->class == VIDEO_DECODE_CLASS ||
  1079. engine->class == VIDEO_ENHANCEMENT_CLASS ||
  1080. engine->class == COMPUTE_CLASS ||
  1081. engine->class == OTHER_CLASS))
  1082. engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
  1083. else
  1084. engine->tlb_inv.request = val;
  1085. return 0;
  1086. }
  1087. static int engine_setup_common(struct intel_engine_cs *engine)
  1088. {
  1089. int err;
  1090. init_llist_head(&engine->barrier_tasks);
  1091. err = intel_engine_init_tlb_invalidation(engine);
  1092. if (err)
  1093. return err;
  1094. err = init_status_page(engine);
  1095. if (err)
  1096. return err;
  1097. engine->breadcrumbs = intel_breadcrumbs_create(engine);
  1098. if (!engine->breadcrumbs) {
  1099. err = -ENOMEM;
  1100. goto err_status;
  1101. }
  1102. engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL);
  1103. if (!engine->sched_engine) {
  1104. err = -ENOMEM;
  1105. goto err_sched_engine;
  1106. }
  1107. engine->sched_engine->private_data = engine;
  1108. err = intel_engine_init_cmd_parser(engine);
  1109. if (err)
  1110. goto err_cmd_parser;
  1111. intel_engine_init_execlists(engine);
  1112. intel_engine_init__pm(engine);
  1113. intel_engine_init_retire(engine);
  1114. /* Use the whole device by default */
  1115. engine->sseu =
  1116. intel_sseu_from_device_info(&engine->gt->info.sseu);
  1117. intel_engine_init_workarounds(engine);
  1118. intel_engine_init_whitelist(engine);
  1119. intel_engine_init_ctx_wa(engine);
  1120. if (GRAPHICS_VER(engine->i915) >= 12)
  1121. engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
  1122. return 0;
  1123. err_cmd_parser:
  1124. i915_sched_engine_put(engine->sched_engine);
  1125. err_sched_engine:
  1126. intel_breadcrumbs_put(engine->breadcrumbs);
  1127. err_status:
  1128. cleanup_status_page(engine);
  1129. return err;
  1130. }
  1131. struct measure_breadcrumb {
  1132. struct i915_request rq;
  1133. struct intel_ring ring;
  1134. u32 cs[2048];
  1135. };
  1136. static int measure_breadcrumb_dw(struct intel_context *ce)
  1137. {
  1138. struct intel_engine_cs *engine = ce->engine;
  1139. struct measure_breadcrumb *frame;
  1140. int dw;
  1141. GEM_BUG_ON(!engine->gt->scratch);
  1142. frame = kzalloc_obj(*frame);
  1143. if (!frame)
  1144. return -ENOMEM;
  1145. frame->rq.i915 = engine->i915;
  1146. frame->rq.engine = engine;
  1147. frame->rq.context = ce;
  1148. rcu_assign_pointer(frame->rq.timeline, ce->timeline);
  1149. frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno;
  1150. frame->ring.vaddr = frame->cs;
  1151. frame->ring.size = sizeof(frame->cs);
  1152. frame->ring.wrap =
  1153. BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
  1154. frame->ring.effective_size = frame->ring.size;
  1155. intel_ring_update_space(&frame->ring);
  1156. frame->rq.ring = &frame->ring;
  1157. mutex_lock(&ce->timeline->mutex);
  1158. spin_lock_irq(&engine->sched_engine->lock);
  1159. dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
  1160. spin_unlock_irq(&engine->sched_engine->lock);
  1161. mutex_unlock(&ce->timeline->mutex);
  1162. GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
  1163. kfree(frame);
  1164. return dw;
  1165. }
  1166. struct intel_context *
  1167. intel_engine_create_pinned_context(struct intel_engine_cs *engine,
  1168. struct i915_address_space *vm,
  1169. unsigned int ring_size,
  1170. unsigned int hwsp,
  1171. struct lock_class_key *key,
  1172. const char *name)
  1173. {
  1174. struct intel_context *ce;
  1175. int err;
  1176. ce = intel_context_create(engine);
  1177. if (IS_ERR(ce))
  1178. return ce;
  1179. __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
  1180. ce->timeline = page_pack_bits(NULL, hwsp);
  1181. ce->ring = NULL;
  1182. ce->ring_size = ring_size;
  1183. i915_vm_put(ce->vm);
  1184. ce->vm = i915_vm_get(vm);
  1185. err = intel_context_pin(ce); /* perma-pin so it is always available */
  1186. if (err) {
  1187. intel_context_put(ce);
  1188. return ERR_PTR(err);
  1189. }
  1190. list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);
  1191. /*
  1192. * Give our perma-pinned kernel timelines a separate lockdep class,
  1193. * so that we can use them from within the normal user timelines
  1194. * should we need to inject GPU operations during their request
  1195. * construction.
  1196. */
  1197. lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
  1198. return ce;
  1199. }
  1200. void intel_engine_destroy_pinned_context(struct intel_context *ce)
  1201. {
  1202. struct intel_engine_cs *engine = ce->engine;
  1203. struct i915_vma *hwsp = engine->status_page.vma;
  1204. GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp);
  1205. mutex_lock(&hwsp->vm->mutex);
  1206. list_del(&ce->timeline->engine_link);
  1207. mutex_unlock(&hwsp->vm->mutex);
  1208. list_del(&ce->pinned_contexts_link);
  1209. intel_context_unpin(ce);
  1210. intel_context_put(ce);
  1211. }
  1212. static struct intel_context *
  1213. create_ggtt_bind_context(struct intel_engine_cs *engine)
  1214. {
  1215. static struct lock_class_key kernel;
  1216. /*
  1217. * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple
  1218. * bind requests at a time so get a bigger ring.
  1219. */
  1220. return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K,
  1221. I915_GEM_HWS_GGTT_BIND_ADDR,
  1222. &kernel, "ggtt_bind_context");
  1223. }
  1224. static struct intel_context *
  1225. create_kernel_context(struct intel_engine_cs *engine)
  1226. {
  1227. static struct lock_class_key kernel;
  1228. return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K,
  1229. I915_GEM_HWS_SEQNO_ADDR,
  1230. &kernel, "kernel_context");
  1231. }
  1232. /*
  1233. * engine_init_common - initialize engine state which might require hw access
  1234. * @engine: Engine to initialize.
  1235. *
  1236. * Initializes @engine@ structure members shared between legacy and execlists
  1237. * submission modes which do require hardware access.
  1238. *
  1239. * Typcally done at later stages of submission mode specific engine setup.
  1240. *
  1241. * Returns zero on success or an error code on failure.
  1242. */
  1243. static int engine_init_common(struct intel_engine_cs *engine)
  1244. {
  1245. struct intel_context *ce, *bce = NULL;
  1246. int ret;
  1247. engine->set_default_submission(engine);
  1248. /*
  1249. * We may need to do things with the shrinker which
  1250. * require us to immediately switch back to the default
  1251. * context. This can cause a problem as pinning the
  1252. * default context also requires GTT space which may not
  1253. * be available. To avoid this we always pin the default
  1254. * context.
  1255. */
  1256. ce = create_kernel_context(engine);
  1257. if (IS_ERR(ce))
  1258. return PTR_ERR(ce);
  1259. /*
  1260. * Create a separate pinned context for GGTT update with blitter engine
  1261. * if a platform require such service. MI_UPDATE_GTT works on other
  1262. * engines as well but BCS should be less busy engine so pick that for
  1263. * GGTT updates.
  1264. */
  1265. if (i915_ggtt_require_binder(engine->i915) && engine->id == BCS0) {
  1266. bce = create_ggtt_bind_context(engine);
  1267. if (IS_ERR(bce)) {
  1268. ret = PTR_ERR(bce);
  1269. goto err_ce_context;
  1270. }
  1271. }
  1272. ret = measure_breadcrumb_dw(ce);
  1273. if (ret < 0)
  1274. goto err_bce_context;
  1275. engine->emit_fini_breadcrumb_dw = ret;
  1276. engine->kernel_context = ce;
  1277. engine->bind_context = bce;
  1278. return 0;
  1279. err_bce_context:
  1280. if (bce)
  1281. intel_engine_destroy_pinned_context(bce);
  1282. err_ce_context:
  1283. intel_engine_destroy_pinned_context(ce);
  1284. return ret;
  1285. }
  1286. int intel_engines_init(struct intel_gt *gt)
  1287. {
  1288. int (*setup)(struct intel_engine_cs *engine);
  1289. struct intel_engine_cs *engine;
  1290. enum intel_engine_id id;
  1291. int err;
  1292. if (intel_uc_uses_guc_submission(&gt->uc)) {
  1293. gt->submission_method = INTEL_SUBMISSION_GUC;
  1294. setup = intel_guc_submission_setup;
  1295. } else if (HAS_EXECLISTS(gt->i915)) {
  1296. gt->submission_method = INTEL_SUBMISSION_ELSP;
  1297. setup = intel_execlists_submission_setup;
  1298. } else {
  1299. gt->submission_method = INTEL_SUBMISSION_RING;
  1300. setup = intel_ring_submission_setup;
  1301. }
  1302. for_each_engine(engine, gt, id) {
  1303. err = engine_setup_common(engine);
  1304. if (err)
  1305. return err;
  1306. err = setup(engine);
  1307. if (err) {
  1308. intel_engine_cleanup_common(engine);
  1309. return err;
  1310. }
  1311. /* The backend should now be responsible for cleanup */
  1312. GEM_BUG_ON(engine->release == NULL);
  1313. err = engine_init_common(engine);
  1314. if (err)
  1315. return err;
  1316. intel_engine_add_user(engine);
  1317. }
  1318. return 0;
  1319. }
  1320. /**
  1321. * intel_engine_cleanup_common - cleans up the engine state created by
  1322. * the common initializers.
  1323. * @engine: Engine to cleanup.
  1324. *
  1325. * This cleans up everything created by the common helpers.
  1326. */
  1327. void intel_engine_cleanup_common(struct intel_engine_cs *engine)
  1328. {
  1329. GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
  1330. i915_sched_engine_put(engine->sched_engine);
  1331. intel_breadcrumbs_put(engine->breadcrumbs);
  1332. intel_engine_fini_retire(engine);
  1333. intel_engine_cleanup_cmd_parser(engine);
  1334. if (engine->default_state)
  1335. fput(engine->default_state);
  1336. if (engine->kernel_context)
  1337. intel_engine_destroy_pinned_context(engine->kernel_context);
  1338. if (engine->bind_context)
  1339. intel_engine_destroy_pinned_context(engine->bind_context);
  1340. GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
  1341. cleanup_status_page(engine);
  1342. intel_wa_list_free(&engine->ctx_wa_list);
  1343. intel_wa_list_free(&engine->wa_list);
  1344. intel_wa_list_free(&engine->whitelist);
  1345. }
  1346. /**
  1347. * intel_engine_resume - re-initializes the HW state of the engine
  1348. * @engine: Engine to resume.
  1349. *
  1350. * Returns zero on success or an error code on failure.
  1351. */
  1352. int intel_engine_resume(struct intel_engine_cs *engine)
  1353. {
  1354. intel_engine_apply_workarounds(engine);
  1355. intel_engine_apply_whitelist(engine);
  1356. return engine->resume(engine);
  1357. }
  1358. u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
  1359. {
  1360. struct drm_i915_private *i915 = engine->i915;
  1361. u64 acthd;
  1362. if (GRAPHICS_VER(i915) >= 8)
  1363. acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
  1364. else if (GRAPHICS_VER(i915) >= 4)
  1365. acthd = ENGINE_READ(engine, RING_ACTHD);
  1366. else
  1367. acthd = ENGINE_READ(engine, ACTHD);
  1368. return acthd;
  1369. }
  1370. u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
  1371. {
  1372. u64 bbaddr;
  1373. if (GRAPHICS_VER(engine->i915) >= 8)
  1374. bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
  1375. else
  1376. bbaddr = ENGINE_READ(engine, RING_BBADDR);
  1377. return bbaddr;
  1378. }
  1379. static unsigned long stop_timeout(const struct intel_engine_cs *engine)
  1380. {
  1381. if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
  1382. return 0;
  1383. /*
  1384. * If we are doing a normal GPU reset, we can take our time and allow
  1385. * the engine to quiesce. We've stopped submission to the engine, and
  1386. * if we wait long enough an innocent context should complete and
  1387. * leave the engine idle. So they should not be caught unaware by
  1388. * the forthcoming GPU reset (which usually follows the stop_cs)!
  1389. */
  1390. return READ_ONCE(engine->props.stop_timeout_ms);
  1391. }
  1392. static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
  1393. int fast_timeout_us,
  1394. int slow_timeout_ms)
  1395. {
  1396. struct intel_uncore *uncore = engine->uncore;
  1397. const i915_reg_t mode = RING_MI_MODE(engine->mmio_base);
  1398. int err;
  1399. intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
  1400. /*
  1401. * Wa_22011802037: Prior to doing a reset, ensure CS is
  1402. * stopped, set ring stop bit and prefetch disable bit to halt CS
  1403. */
  1404. if (intel_engine_reset_needs_wa_22011802037(engine->gt))
  1405. intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
  1406. _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
  1407. err = __intel_wait_for_register_fw(engine->uncore, mode,
  1408. MODE_IDLE, MODE_IDLE,
  1409. fast_timeout_us,
  1410. slow_timeout_ms,
  1411. NULL);
  1412. /* A final mmio read to let GPU writes be hopefully flushed to memory */
  1413. intel_uncore_posting_read_fw(uncore, mode);
  1414. return err;
  1415. }
  1416. int intel_engine_stop_cs(struct intel_engine_cs *engine)
  1417. {
  1418. int err = 0;
  1419. if (GRAPHICS_VER(engine->i915) < 3)
  1420. return -ENODEV;
  1421. ENGINE_TRACE(engine, "\n");
  1422. /*
  1423. * TODO: Find out why occasionally stopping the CS times out. Seen
  1424. * especially with gem_eio tests.
  1425. *
  1426. * Occasionally trying to stop the cs times out, but does not adversely
  1427. * affect functionality. The timeout is set as a config parameter that
  1428. * defaults to 100ms. In most cases the follow up operation is to wait
  1429. * for pending MI_FORCE_WAKES. The assumption is that this timeout is
  1430. * sufficient for any pending MI_FORCEWAKEs to complete. Once root
  1431. * caused, the caller must check and handle the return from this
  1432. * function.
  1433. */
  1434. if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
  1435. ENGINE_TRACE(engine,
  1436. "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
  1437. ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR,
  1438. ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR);
  1439. /*
  1440. * Sometimes we observe that the idle flag is not
  1441. * set even though the ring is empty. So double
  1442. * check before giving up.
  1443. */
  1444. if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) !=
  1445. (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR))
  1446. err = -ETIMEDOUT;
  1447. }
  1448. return err;
  1449. }
  1450. void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
  1451. {
  1452. ENGINE_TRACE(engine, "\n");
  1453. ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
  1454. }
  1455. static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
  1456. {
  1457. static const i915_reg_t _reg[I915_NUM_ENGINES] = {
  1458. [RCS0] = MSG_IDLE_CS,
  1459. [BCS0] = MSG_IDLE_BCS,
  1460. [VCS0] = MSG_IDLE_VCS0,
  1461. [VCS1] = MSG_IDLE_VCS1,
  1462. [VCS2] = MSG_IDLE_VCS2,
  1463. [VCS3] = MSG_IDLE_VCS3,
  1464. [VCS4] = MSG_IDLE_VCS4,
  1465. [VCS5] = MSG_IDLE_VCS5,
  1466. [VCS6] = MSG_IDLE_VCS6,
  1467. [VCS7] = MSG_IDLE_VCS7,
  1468. [VECS0] = MSG_IDLE_VECS0,
  1469. [VECS1] = MSG_IDLE_VECS1,
  1470. [VECS2] = MSG_IDLE_VECS2,
  1471. [VECS3] = MSG_IDLE_VECS3,
  1472. [CCS0] = MSG_IDLE_CS,
  1473. [CCS1] = MSG_IDLE_CS,
  1474. [CCS2] = MSG_IDLE_CS,
  1475. [CCS3] = MSG_IDLE_CS,
  1476. };
  1477. u32 val;
  1478. if (!_reg[engine->id].reg)
  1479. return 0;
  1480. val = intel_uncore_read(engine->uncore, _reg[engine->id]);
  1481. /* bits[29:25] & bits[13:9] >> shift */
  1482. return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
  1483. }
  1484. static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
  1485. {
  1486. int ret;
  1487. /* Ensure GPM receives fw up/down after CS is stopped */
  1488. udelay(1);
  1489. /* Wait for forcewake request to complete in GPM */
  1490. ret = __intel_wait_for_register_fw(gt->uncore,
  1491. GEN9_PWRGT_DOMAIN_STATUS,
  1492. fw_mask, fw_mask, 5000, 0, NULL);
  1493. /* Ensure CS receives fw ack from GPM */
  1494. udelay(1);
  1495. if (ret)
  1496. GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
  1497. }
  1498. /*
  1499. * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
  1500. * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
  1501. * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
  1502. * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
  1503. * are concerned only with the gt reset here, we use a logical OR of pending
  1504. * forcewakeups from all reset domains and then wait for them to complete by
  1505. * querying PWRGT_DOMAIN_STATUS.
  1506. */
  1507. void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
  1508. {
  1509. u32 fw_pending = __cs_pending_mi_force_wakes(engine);
  1510. if (fw_pending)
  1511. __gpm_wait_for_fw_complete(engine->gt, fw_pending);
  1512. }
  1513. /* NB: please notice the memset */
  1514. void intel_engine_get_instdone(const struct intel_engine_cs *engine,
  1515. struct intel_instdone *instdone)
  1516. {
  1517. struct drm_i915_private *i915 = engine->i915;
  1518. struct intel_uncore *uncore = engine->uncore;
  1519. u32 mmio_base = engine->mmio_base;
  1520. int slice;
  1521. int subslice;
  1522. int iter;
  1523. memset(instdone, 0, sizeof(*instdone));
  1524. if (GRAPHICS_VER(i915) >= 8) {
  1525. instdone->instdone =
  1526. intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
  1527. if (engine->id != RCS0)
  1528. return;
  1529. instdone->slice_common =
  1530. intel_uncore_read(uncore, GEN7_SC_INSTDONE);
  1531. if (GRAPHICS_VER(i915) >= 12) {
  1532. instdone->slice_common_extra[0] =
  1533. intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
  1534. instdone->slice_common_extra[1] =
  1535. intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
  1536. }
  1537. for_each_ss_steering(iter, engine->gt, slice, subslice) {
  1538. instdone->sampler[slice][subslice] =
  1539. intel_gt_mcr_read(engine->gt,
  1540. GEN8_SAMPLER_INSTDONE,
  1541. slice, subslice);
  1542. instdone->row[slice][subslice] =
  1543. intel_gt_mcr_read(engine->gt,
  1544. GEN8_ROW_INSTDONE,
  1545. slice, subslice);
  1546. }
  1547. if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
  1548. for_each_ss_steering(iter, engine->gt, slice, subslice)
  1549. instdone->geom_svg[slice][subslice] =
  1550. intel_gt_mcr_read(engine->gt,
  1551. XEHPG_INSTDONE_GEOM_SVG,
  1552. slice, subslice);
  1553. }
  1554. } else if (GRAPHICS_VER(i915) >= 7) {
  1555. instdone->instdone =
  1556. intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
  1557. if (engine->id != RCS0)
  1558. return;
  1559. instdone->slice_common =
  1560. intel_uncore_read(uncore, GEN7_SC_INSTDONE);
  1561. instdone->sampler[0][0] =
  1562. intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
  1563. instdone->row[0][0] =
  1564. intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
  1565. } else if (GRAPHICS_VER(i915) >= 4) {
  1566. instdone->instdone =
  1567. intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
  1568. if (engine->id == RCS0)
  1569. /* HACK: Using the wrong struct member */
  1570. instdone->slice_common =
  1571. intel_uncore_read(uncore, GEN4_INSTDONE1);
  1572. } else {
  1573. instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
  1574. }
  1575. }
  1576. static bool ring_is_idle(struct intel_engine_cs *engine)
  1577. {
  1578. bool idle = true;
  1579. if (I915_SELFTEST_ONLY(!engine->mmio_base))
  1580. return true;
  1581. if (!intel_engine_pm_get_if_awake(engine))
  1582. return true;
  1583. /* First check that no commands are left in the ring */
  1584. if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
  1585. (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
  1586. idle = false;
  1587. /* No bit for gen2, so assume the CS parser is idle */
  1588. if (GRAPHICS_VER(engine->i915) > 2 &&
  1589. !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
  1590. idle = false;
  1591. intel_engine_pm_put(engine);
  1592. return idle;
  1593. }
  1594. void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync)
  1595. {
  1596. struct tasklet_struct *t = &engine->sched_engine->tasklet;
  1597. if (!t->callback)
  1598. return;
  1599. local_bh_disable();
  1600. if (tasklet_trylock(t)) {
  1601. /* Must wait for any GPU reset in progress. */
  1602. if (__tasklet_is_enabled(t))
  1603. t->callback(t);
  1604. tasklet_unlock(t);
  1605. }
  1606. local_bh_enable();
  1607. /* Synchronise and wait for the tasklet on another CPU */
  1608. if (sync)
  1609. tasklet_unlock_wait(t);
  1610. }
  1611. /**
  1612. * intel_engine_is_idle() - Report if the engine has finished process all work
  1613. * @engine: the intel_engine_cs
  1614. *
  1615. * Return true if there are no requests pending, nothing left to be submitted
  1616. * to hardware, and that the engine is idle.
  1617. */
  1618. bool intel_engine_is_idle(struct intel_engine_cs *engine)
  1619. {
  1620. /* More white lies, if wedged, hw state is inconsistent */
  1621. if (intel_gt_is_wedged(engine->gt))
  1622. return true;
  1623. if (!intel_engine_pm_is_awake(engine))
  1624. return true;
  1625. /* Waiting to drain ELSP? */
  1626. intel_synchronize_hardirq(engine->i915);
  1627. intel_engine_flush_submission(engine);
  1628. /* ELSP is empty, but there are ready requests? E.g. after reset */
  1629. if (!i915_sched_engine_is_empty(engine->sched_engine))
  1630. return false;
  1631. /* Ring stopped? */
  1632. return ring_is_idle(engine);
  1633. }
  1634. bool intel_engines_are_idle(struct intel_gt *gt)
  1635. {
  1636. struct intel_engine_cs *engine;
  1637. enum intel_engine_id id;
  1638. /*
  1639. * If the driver is wedged, HW state may be very inconsistent and
  1640. * report that it is still busy, even though we have stopped using it.
  1641. */
  1642. if (intel_gt_is_wedged(gt))
  1643. return true;
  1644. /* Already parked (and passed an idleness test); must still be idle */
  1645. if (!READ_ONCE(gt->awake))
  1646. return true;
  1647. for_each_engine(engine, gt, id) {
  1648. if (!intel_engine_is_idle(engine))
  1649. return false;
  1650. }
  1651. return true;
  1652. }
  1653. bool intel_engine_irq_enable(struct intel_engine_cs *engine)
  1654. {
  1655. if (!engine->irq_enable)
  1656. return false;
  1657. /* Caller disables interrupts */
  1658. spin_lock(engine->gt->irq_lock);
  1659. engine->irq_enable(engine);
  1660. spin_unlock(engine->gt->irq_lock);
  1661. return true;
  1662. }
  1663. void intel_engine_irq_disable(struct intel_engine_cs *engine)
  1664. {
  1665. if (!engine->irq_disable)
  1666. return;
  1667. /* Caller disables interrupts */
  1668. spin_lock(engine->gt->irq_lock);
  1669. engine->irq_disable(engine);
  1670. spin_unlock(engine->gt->irq_lock);
  1671. }
  1672. void intel_engines_reset_default_submission(struct intel_gt *gt)
  1673. {
  1674. struct intel_engine_cs *engine;
  1675. enum intel_engine_id id;
  1676. for_each_engine(engine, gt, id) {
  1677. if (engine->sanitize)
  1678. engine->sanitize(engine);
  1679. if (engine->set_default_submission)
  1680. engine->set_default_submission(engine);
  1681. }
  1682. }
  1683. bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
  1684. {
  1685. switch (GRAPHICS_VER(engine->i915)) {
  1686. case 2:
  1687. return false; /* uses physical not virtual addresses */
  1688. case 3:
  1689. /* maybe only uses physical not virtual addresses */
  1690. return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
  1691. case 4:
  1692. return !IS_I965G(engine->i915); /* who knows! */
  1693. case 6:
  1694. return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
  1695. default:
  1696. return true;
  1697. }
  1698. }
  1699. static struct intel_timeline *get_timeline(struct i915_request *rq)
  1700. {
  1701. struct intel_timeline *tl;
  1702. /*
  1703. * Even though we are holding the engine->sched_engine->lock here, there
  1704. * is no control over the submission queue per-se and we are
  1705. * inspecting the active state at a random point in time, with an
  1706. * unknown queue. Play safe and make sure the timeline remains valid.
  1707. * (Only being used for pretty printing, one extra kref shouldn't
  1708. * cause a camel stampede!)
  1709. */
  1710. rcu_read_lock();
  1711. tl = rcu_dereference(rq->timeline);
  1712. if (!kref_get_unless_zero(&tl->kref))
  1713. tl = NULL;
  1714. rcu_read_unlock();
  1715. return tl;
  1716. }
  1717. static int print_ring(char *buf, int sz, struct i915_request *rq)
  1718. {
  1719. int len = 0;
  1720. if (!i915_request_signaled(rq)) {
  1721. struct intel_timeline *tl = get_timeline(rq);
  1722. len = scnprintf(buf, sz,
  1723. "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
  1724. i915_ggtt_offset(rq->ring->vma),
  1725. tl ? tl->hwsp_offset : 0,
  1726. hwsp_seqno(rq),
  1727. DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
  1728. 1000 * 1000));
  1729. if (tl)
  1730. intel_timeline_put(tl);
  1731. }
  1732. return len;
  1733. }
  1734. static void hexdump(struct drm_printer *m, const void *buf, size_t len)
  1735. {
  1736. const size_t rowsize = 8 * sizeof(u32);
  1737. const void *prev = NULL;
  1738. bool skip = false;
  1739. size_t pos;
  1740. for (pos = 0; pos < len; pos += rowsize) {
  1741. char line[128];
  1742. if (prev && !memcmp(prev, buf + pos, rowsize)) {
  1743. if (!skip) {
  1744. drm_printf(m, "*\n");
  1745. skip = true;
  1746. }
  1747. continue;
  1748. }
  1749. WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
  1750. rowsize, sizeof(u32),
  1751. line, sizeof(line),
  1752. false) >= sizeof(line));
  1753. drm_printf(m, "[%04zx] %s\n", pos, line);
  1754. prev = buf + pos;
  1755. skip = false;
  1756. }
  1757. }
  1758. static const char *repr_timer(const struct timer_list *t)
  1759. {
  1760. if (!READ_ONCE(t->expires))
  1761. return "inactive";
  1762. if (timer_pending(t))
  1763. return "active";
  1764. return "expired";
  1765. }
  1766. static void intel_engine_print_registers(struct intel_engine_cs *engine,
  1767. struct drm_printer *m)
  1768. {
  1769. struct drm_i915_private *i915 = engine->i915;
  1770. struct intel_engine_execlists * const execlists = &engine->execlists;
  1771. u64 addr;
  1772. if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(i915, 4, 7))
  1773. drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
  1774. if (HAS_EXECLISTS(i915)) {
  1775. drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
  1776. ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
  1777. drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
  1778. ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
  1779. }
  1780. drm_printf(m, "\tRING_START: 0x%08x\n",
  1781. ENGINE_READ(engine, RING_START));
  1782. drm_printf(m, "\tRING_HEAD: 0x%08x\n",
  1783. ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
  1784. drm_printf(m, "\tRING_TAIL: 0x%08x\n",
  1785. ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
  1786. drm_printf(m, "\tRING_CTL: 0x%08x%s\n",
  1787. ENGINE_READ(engine, RING_CTL),
  1788. ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
  1789. if (GRAPHICS_VER(engine->i915) > 2) {
  1790. drm_printf(m, "\tRING_MODE: 0x%08x%s\n",
  1791. ENGINE_READ(engine, RING_MI_MODE),
  1792. ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
  1793. }
  1794. if (GRAPHICS_VER(i915) >= 6) {
  1795. drm_printf(m, "\tRING_IMR: 0x%08x\n",
  1796. ENGINE_READ(engine, RING_IMR));
  1797. drm_printf(m, "\tRING_ESR: 0x%08x\n",
  1798. ENGINE_READ(engine, RING_ESR));
  1799. drm_printf(m, "\tRING_EMR: 0x%08x\n",
  1800. ENGINE_READ(engine, RING_EMR));
  1801. drm_printf(m, "\tRING_EIR: 0x%08x\n",
  1802. ENGINE_READ(engine, RING_EIR));
  1803. }
  1804. addr = intel_engine_get_active_head(engine);
  1805. drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
  1806. upper_32_bits(addr), lower_32_bits(addr));
  1807. addr = intel_engine_get_last_batch_head(engine);
  1808. drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
  1809. upper_32_bits(addr), lower_32_bits(addr));
  1810. if (GRAPHICS_VER(i915) >= 8)
  1811. addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
  1812. else if (GRAPHICS_VER(i915) >= 4)
  1813. addr = ENGINE_READ(engine, RING_DMA_FADD);
  1814. else
  1815. addr = ENGINE_READ(engine, DMA_FADD_I8XX);
  1816. drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
  1817. upper_32_bits(addr), lower_32_bits(addr));
  1818. if (GRAPHICS_VER(i915) >= 4) {
  1819. drm_printf(m, "\tIPEIR: 0x%08x\n",
  1820. ENGINE_READ(engine, RING_IPEIR));
  1821. drm_printf(m, "\tIPEHR: 0x%08x\n",
  1822. ENGINE_READ(engine, RING_IPEHR));
  1823. } else {
  1824. drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
  1825. drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
  1826. }
  1827. if (HAS_EXECLISTS(i915) && !intel_engine_uses_guc(engine)) {
  1828. struct i915_request * const *port, *rq;
  1829. const u32 *hws =
  1830. &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
  1831. const u8 num_entries = execlists->csb_size;
  1832. unsigned int idx;
  1833. u8 read, write;
  1834. drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
  1835. str_yes_no(test_bit(TASKLET_STATE_SCHED, &engine->sched_engine->tasklet.state)),
  1836. str_enabled_disabled(!atomic_read(&engine->sched_engine->tasklet.count)),
  1837. repr_timer(&engine->execlists.preempt),
  1838. repr_timer(&engine->execlists.timer));
  1839. read = execlists->csb_head;
  1840. write = READ_ONCE(*execlists->csb_write);
  1841. drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
  1842. ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
  1843. ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
  1844. read, write, num_entries);
  1845. if (read >= num_entries)
  1846. read = 0;
  1847. if (write >= num_entries)
  1848. write = 0;
  1849. if (read > write)
  1850. write += num_entries;
  1851. while (read < write) {
  1852. idx = ++read % num_entries;
  1853. drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
  1854. idx, hws[idx * 2], hws[idx * 2 + 1]);
  1855. }
  1856. i915_sched_engine_active_lock_bh(engine->sched_engine);
  1857. rcu_read_lock();
  1858. for (port = execlists->active; (rq = *port); port++) {
  1859. char hdr[160];
  1860. int len;
  1861. len = scnprintf(hdr, sizeof(hdr),
  1862. "\t\tActive[%d]: ccid:%08x%s%s, ",
  1863. (int)(port - execlists->active),
  1864. rq->context->lrc.ccid,
  1865. intel_context_is_closed(rq->context) ? "!" : "",
  1866. intel_context_is_banned(rq->context) ? "*" : "");
  1867. len += print_ring(hdr + len, sizeof(hdr) - len, rq);
  1868. scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
  1869. i915_request_show(m, rq, hdr, 0);
  1870. }
  1871. for (port = execlists->pending; (rq = *port); port++) {
  1872. char hdr[160];
  1873. int len;
  1874. len = scnprintf(hdr, sizeof(hdr),
  1875. "\t\tPending[%d]: ccid:%08x%s%s, ",
  1876. (int)(port - execlists->pending),
  1877. rq->context->lrc.ccid,
  1878. intel_context_is_closed(rq->context) ? "!" : "",
  1879. intel_context_is_banned(rq->context) ? "*" : "");
  1880. len += print_ring(hdr + len, sizeof(hdr) - len, rq);
  1881. scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
  1882. i915_request_show(m, rq, hdr, 0);
  1883. }
  1884. rcu_read_unlock();
  1885. i915_sched_engine_active_unlock_bh(engine->sched_engine);
  1886. } else if (GRAPHICS_VER(i915) > 6) {
  1887. drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
  1888. ENGINE_READ(engine, RING_PP_DIR_BASE));
  1889. drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
  1890. ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
  1891. drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
  1892. ENGINE_READ(engine, RING_PP_DIR_DCLV));
  1893. }
  1894. }
  1895. static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
  1896. {
  1897. struct i915_vma_resource *vma_res = rq->batch_res;
  1898. void *ring;
  1899. int size;
  1900. drm_printf(m,
  1901. "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
  1902. rq->head, rq->postfix, rq->tail,
  1903. vma_res ? upper_32_bits(vma_res->start) : ~0u,
  1904. vma_res ? lower_32_bits(vma_res->start) : ~0u);
  1905. size = rq->tail - rq->head;
  1906. if (rq->tail < rq->head)
  1907. size += rq->ring->size;
  1908. ring = kmalloc(size, GFP_ATOMIC);
  1909. if (ring) {
  1910. const void *vaddr = rq->ring->vaddr;
  1911. unsigned int head = rq->head;
  1912. unsigned int len = 0;
  1913. if (rq->tail < head) {
  1914. len = rq->ring->size - head;
  1915. memcpy(ring, vaddr + head, len);
  1916. head = 0;
  1917. }
  1918. memcpy(ring + len, vaddr + head, size - len);
  1919. hexdump(m, ring, size);
  1920. kfree(ring);
  1921. }
  1922. }
  1923. static unsigned long read_ul(void *p, size_t x)
  1924. {
  1925. return *(unsigned long *)(p + x);
  1926. }
  1927. static void print_properties(struct intel_engine_cs *engine,
  1928. struct drm_printer *m)
  1929. {
  1930. static const struct pmap {
  1931. size_t offset;
  1932. const char *name;
  1933. } props[] = {
  1934. #define P(x) { \
  1935. .offset = offsetof(typeof(engine->props), x), \
  1936. .name = #x \
  1937. }
  1938. P(heartbeat_interval_ms),
  1939. P(max_busywait_duration_ns),
  1940. P(preempt_timeout_ms),
  1941. P(stop_timeout_ms),
  1942. P(timeslice_duration_ms),
  1943. {},
  1944. #undef P
  1945. };
  1946. const struct pmap *p;
  1947. drm_printf(m, "\tProperties:\n");
  1948. for (p = props; p->name; p++)
  1949. drm_printf(m, "\t\t%s: %lu [default %lu]\n",
  1950. p->name,
  1951. read_ul(&engine->props, p->offset),
  1952. read_ul(&engine->defaults, p->offset));
  1953. }
  1954. static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg)
  1955. {
  1956. struct intel_timeline *tl = get_timeline(rq);
  1957. i915_request_show(m, rq, msg, 0);
  1958. drm_printf(m, "\t\tring->start: 0x%08x\n",
  1959. i915_ggtt_offset(rq->ring->vma));
  1960. drm_printf(m, "\t\tring->head: 0x%08x\n",
  1961. rq->ring->head);
  1962. drm_printf(m, "\t\tring->tail: 0x%08x\n",
  1963. rq->ring->tail);
  1964. drm_printf(m, "\t\tring->emit: 0x%08x\n",
  1965. rq->ring->emit);
  1966. drm_printf(m, "\t\tring->space: 0x%08x\n",
  1967. rq->ring->space);
  1968. if (tl) {
  1969. drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
  1970. tl->hwsp_offset);
  1971. intel_timeline_put(tl);
  1972. }
  1973. print_request_ring(m, rq);
  1974. if (rq->context->lrc_reg_state) {
  1975. drm_printf(m, "Logical Ring Context:\n");
  1976. hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
  1977. }
  1978. }
  1979. void intel_engine_dump_active_requests(struct list_head *requests,
  1980. struct i915_request *hung_rq,
  1981. struct drm_printer *m)
  1982. {
  1983. struct i915_request *rq;
  1984. const char *msg;
  1985. enum i915_request_state state;
  1986. list_for_each_entry(rq, requests, sched.link) {
  1987. if (rq == hung_rq)
  1988. continue;
  1989. state = i915_test_request_state(rq);
  1990. if (state < I915_REQUEST_QUEUED)
  1991. continue;
  1992. if (state == I915_REQUEST_ACTIVE)
  1993. msg = "\t\tactive on engine";
  1994. else
  1995. msg = "\t\tactive in queue";
  1996. engine_dump_request(rq, m, msg);
  1997. }
  1998. }
  1999. static void engine_dump_active_requests(struct intel_engine_cs *engine,
  2000. struct drm_printer *m)
  2001. {
  2002. struct intel_context *hung_ce = NULL;
  2003. struct i915_request *hung_rq = NULL;
  2004. /*
  2005. * No need for an engine->irq_seqno_barrier() before the seqno reads.
  2006. * The GPU is still running so requests are still executing and any
  2007. * hardware reads will be out of date by the time they are reported.
  2008. * But the intention here is just to report an instantaneous snapshot
  2009. * so that's fine.
  2010. */
  2011. intel_engine_get_hung_entity(engine, &hung_ce, &hung_rq);
  2012. drm_printf(m, "\tRequests:\n");
  2013. if (hung_rq)
  2014. engine_dump_request(hung_rq, m, "\t\thung");
  2015. else if (hung_ce)
  2016. drm_printf(m, "\t\tGot hung ce but no hung rq!\n");
  2017. if (intel_uc_uses_guc_submission(&engine->gt->uc))
  2018. intel_guc_dump_active_requests(engine, hung_rq, m);
  2019. else
  2020. intel_execlists_dump_active_requests(engine, hung_rq, m);
  2021. if (hung_rq)
  2022. i915_request_put(hung_rq);
  2023. }
  2024. void intel_engine_dump(struct intel_engine_cs *engine,
  2025. struct drm_printer *m,
  2026. const char *header, ...)
  2027. {
  2028. struct i915_gpu_error * const error = &engine->i915->gpu_error;
  2029. struct i915_request *rq;
  2030. intel_wakeref_t wakeref;
  2031. ktime_t dummy;
  2032. if (header) {
  2033. va_list ap;
  2034. va_start(ap, header);
  2035. drm_vprintf(m, header, &ap);
  2036. va_end(ap);
  2037. }
  2038. if (intel_gt_is_wedged(engine->gt))
  2039. drm_printf(m, "*** WEDGED ***\n");
  2040. drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
  2041. drm_printf(m, "\tBarriers?: %s\n",
  2042. str_yes_no(!llist_empty(&engine->barrier_tasks)));
  2043. drm_printf(m, "\tLatency: %luus\n",
  2044. ewma__engine_latency_read(&engine->latency));
  2045. if (intel_engine_supports_stats(engine))
  2046. drm_printf(m, "\tRuntime: %llums\n",
  2047. ktime_to_ms(intel_engine_get_busy_time(engine,
  2048. &dummy)));
  2049. drm_printf(m, "\tForcewake: %x domains, %d active\n",
  2050. engine->fw_domain, READ_ONCE(engine->fw_active));
  2051. rcu_read_lock();
  2052. rq = READ_ONCE(engine->heartbeat.systole);
  2053. if (rq)
  2054. drm_printf(m, "\tHeartbeat: %d ms ago\n",
  2055. jiffies_to_msecs(jiffies - rq->emitted_jiffies));
  2056. rcu_read_unlock();
  2057. drm_printf(m, "\tReset count: %d (global %d)\n",
  2058. i915_reset_engine_count(error, engine),
  2059. i915_reset_count(error));
  2060. print_properties(engine, m);
  2061. engine_dump_active_requests(engine, m);
  2062. drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
  2063. wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
  2064. if (wakeref) {
  2065. intel_engine_print_registers(engine, m);
  2066. intel_runtime_pm_put(engine->uncore->rpm, wakeref);
  2067. } else {
  2068. drm_printf(m, "\tDevice is asleep; skipping register dump\n");
  2069. }
  2070. intel_execlists_show_requests(engine, m, i915_request_show, 8);
  2071. drm_printf(m, "HWSP:\n");
  2072. hexdump(m, engine->status_page.addr, PAGE_SIZE);
  2073. drm_printf(m, "Idle? %s\n", str_yes_no(intel_engine_is_idle(engine)));
  2074. intel_engine_print_breadcrumbs(engine, m);
  2075. }
  2076. /**
  2077. * intel_engine_get_busy_time() - Return current accumulated engine busyness
  2078. * @engine: engine to report on
  2079. * @now: monotonic timestamp of sampling
  2080. *
  2081. * Returns accumulated time @engine was busy since engine stats were enabled.
  2082. */
  2083. ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
  2084. {
  2085. return engine->busyness(engine, now);
  2086. }
  2087. struct intel_context *
  2088. intel_engine_create_virtual(struct intel_engine_cs **siblings,
  2089. unsigned int count, unsigned long flags)
  2090. {
  2091. if (count == 0)
  2092. return ERR_PTR(-EINVAL);
  2093. if (count == 1 && !(flags & FORCE_VIRTUAL))
  2094. return intel_context_create(siblings[0]);
  2095. GEM_BUG_ON(!siblings[0]->cops->create_virtual);
  2096. return siblings[0]->cops->create_virtual(siblings, count, flags);
  2097. }
  2098. static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine)
  2099. {
  2100. struct i915_request *request, *active = NULL;
  2101. /*
  2102. * This search does not work in GuC submission mode. However, the GuC
  2103. * will report the hanging context directly to the driver itself. So
  2104. * the driver should never get here when in GuC mode.
  2105. */
  2106. GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc));
  2107. /*
  2108. * We are called by the error capture, reset and to dump engine
  2109. * state at random points in time. In particular, note that neither is
  2110. * crucially ordered with an interrupt. After a hang, the GPU is dead
  2111. * and we assume that no more writes can happen (we waited long enough
  2112. * for all writes that were in transaction to be flushed) - adding an
  2113. * extra delay for a recent interrupt is pointless. Hence, we do
  2114. * not need an engine->irq_seqno_barrier() before the seqno reads.
  2115. * At all other times, we must assume the GPU is still running, but
  2116. * we only care about the snapshot of this moment.
  2117. */
  2118. lockdep_assert_held(&engine->sched_engine->lock);
  2119. rcu_read_lock();
  2120. request = execlists_active(&engine->execlists);
  2121. if (request) {
  2122. struct intel_timeline *tl = request->context->timeline;
  2123. list_for_each_entry_from_reverse(request, &tl->requests, link) {
  2124. if (__i915_request_is_complete(request))
  2125. break;
  2126. active = request;
  2127. }
  2128. }
  2129. rcu_read_unlock();
  2130. if (active)
  2131. return active;
  2132. list_for_each_entry(request, &engine->sched_engine->requests,
  2133. sched.link) {
  2134. if (i915_test_request_state(request) != I915_REQUEST_ACTIVE)
  2135. continue;
  2136. active = request;
  2137. break;
  2138. }
  2139. return active;
  2140. }
  2141. void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
  2142. struct intel_context **ce, struct i915_request **rq)
  2143. {
  2144. unsigned long flags;
  2145. *ce = intel_engine_get_hung_context(engine);
  2146. if (*ce) {
  2147. intel_engine_clear_hung_context(engine);
  2148. *rq = intel_context_get_active_request(*ce);
  2149. return;
  2150. }
  2151. /*
  2152. * Getting here with GuC enabled means it is a forced error capture
  2153. * with no actual hang. So, no need to attempt the execlist search.
  2154. */
  2155. if (intel_uc_uses_guc_submission(&engine->gt->uc))
  2156. return;
  2157. spin_lock_irqsave(&engine->sched_engine->lock, flags);
  2158. *rq = engine_execlist_find_hung_request(engine);
  2159. if (*rq)
  2160. *rq = i915_request_get_rcu(*rq);
  2161. spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
  2162. }
  2163. void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
  2164. {
  2165. /*
  2166. * If there are any non-fused-off CCS engines, we need to enable CCS
  2167. * support in the RCU_MODE register. This only needs to be done once,
  2168. * so for simplicity we'll take care of this in the RCS engine's
  2169. * resume handler; since the RCS and all CCS engines belong to the
  2170. * same reset domain and are reset together, this will also take care
  2171. * of re-applying the setting after i915-triggered resets.
  2172. */
  2173. if (!CCS_MASK(engine->gt))
  2174. return;
  2175. intel_uncore_write(engine->uncore, GEN12_RCU_MODE,
  2176. _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
  2177. }
  2178. #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
  2179. #include "mock_engine.c"
  2180. #include "selftest_engine.c"
  2181. #include "selftest_engine_cs.c"
  2182. #endif