a5xx_gpu.c 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
  3. */
  4. #include <linux/kernel.h>
  5. #include <linux/types.h>
  6. #include <linux/cpumask.h>
  7. #include <linux/firmware/qcom/qcom_scm.h>
  8. #include <linux/pm_opp.h>
  9. #include <linux/nvmem-consumer.h>
  10. #include <linux/slab.h>
  11. #include "msm_gem.h"
  12. #include "msm_mmu.h"
  13. #include "a5xx_gpu.h"
  14. extern bool hang_debug;
  15. static void a5xx_dump(struct msm_gpu *gpu);
  16. #define GPU_PAS_ID 13
  17. static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  18. {
  19. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  20. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  21. if (a5xx_gpu->has_whereami) {
  22. OUT_PKT7(ring, CP_WHERE_AM_I, 2);
  23. OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
  24. OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
  25. }
  26. }
  27. void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
  28. bool sync)
  29. {
  30. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  31. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  32. uint32_t wptr;
  33. unsigned long flags;
  34. /*
  35. * Most flush operations need to issue a WHERE_AM_I opcode to sync up
  36. * the rptr shadow
  37. */
  38. if (sync)
  39. update_shadow_rptr(gpu, ring);
  40. spin_lock_irqsave(&ring->preempt_lock, flags);
  41. /* Copy the shadow to the actual register */
  42. ring->cur = ring->next;
  43. /* Make sure to wrap wptr if we need to */
  44. wptr = get_wptr(ring);
  45. spin_unlock_irqrestore(&ring->preempt_lock, flags);
  46. /* Make sure everything is posted before making a decision */
  47. mb();
  48. /* Update HW if this is the current ring and we are not in preempt */
  49. if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
  50. gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
  51. }
  52. static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
  53. {
  54. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  55. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  56. struct msm_ringbuffer *ring = submit->ring;
  57. struct drm_gem_object *obj;
  58. uint32_t *ptr, dwords;
  59. unsigned int i;
  60. for (i = 0; i < submit->nr_cmds; i++) {
  61. switch (submit->cmd[i].type) {
  62. case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  63. break;
  64. case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  65. if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
  66. break;
  67. fallthrough;
  68. case MSM_SUBMIT_CMD_BUF:
  69. /* copy commands into RB: */
  70. obj = submit->bos[submit->cmd[i].idx].obj;
  71. dwords = submit->cmd[i].size;
  72. ptr = msm_gem_get_vaddr(obj);
  73. /* _get_vaddr() shouldn't fail at this point,
  74. * since we've already mapped it once in
  75. * submit_reloc()
  76. */
  77. if (WARN_ON(IS_ERR_OR_NULL(ptr)))
  78. return;
  79. for (i = 0; i < dwords; i++) {
  80. /* normally the OUT_PKTn() would wait
  81. * for space for the packet. But since
  82. * we just OUT_RING() the whole thing,
  83. * need to call adreno_wait_ring()
  84. * ourself:
  85. */
  86. adreno_wait_ring(ring, 1);
  87. OUT_RING(ring, ptr[i]);
  88. }
  89. msm_gem_put_vaddr(obj);
  90. break;
  91. }
  92. }
  93. a5xx_gpu->last_seqno[ring->id] = submit->seqno;
  94. a5xx_flush(gpu, ring, true);
  95. a5xx_preempt_trigger(gpu);
  96. /* we might not necessarily have a cmd from userspace to
  97. * trigger an event to know that submit has completed, so
  98. * do this manually:
  99. */
  100. a5xx_idle(gpu, ring);
  101. ring->memptrs->fence = submit->seqno;
  102. msm_gpu_retire(gpu);
  103. }
  104. static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
  105. {
  106. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  107. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  108. struct msm_ringbuffer *ring = submit->ring;
  109. unsigned int i, ibs = 0;
  110. adreno_check_and_reenable_stall(adreno_gpu);
  111. if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
  112. ring->cur_ctx_seqno = 0;
  113. a5xx_submit_in_rb(gpu, submit);
  114. return;
  115. }
  116. OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
  117. OUT_RING(ring, 0x02);
  118. /* Turn off protected mode to write to special registers */
  119. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  120. OUT_RING(ring, 0);
  121. /* Set the save preemption record for the ring/command */
  122. OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
  123. OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
  124. OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
  125. /* Turn back on protected mode */
  126. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  127. OUT_RING(ring, 1);
  128. /*
  129. * Disable local preemption by default because it requires
  130. * user-space to be aware of it and provide additional handling
  131. * to restore rendering state or do various flushes on switch.
  132. */
  133. OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
  134. OUT_RING(ring, 0x0);
  135. /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
  136. OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
  137. OUT_RING(ring, 0x02);
  138. /* Submit the commands */
  139. for (i = 0; i < submit->nr_cmds; i++) {
  140. switch (submit->cmd[i].type) {
  141. case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  142. break;
  143. case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  144. if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
  145. break;
  146. fallthrough;
  147. case MSM_SUBMIT_CMD_BUF:
  148. OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
  149. OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
  150. OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
  151. OUT_RING(ring, submit->cmd[i].size);
  152. ibs++;
  153. break;
  154. }
  155. /*
  156. * Periodically update shadow-wptr if needed, so that we
  157. * can see partial progress of submits with large # of
  158. * cmds.. otherwise we could needlessly stall waiting for
  159. * ringbuffer state, simply due to looking at a shadow
  160. * rptr value that has not been updated
  161. */
  162. if ((ibs % 32) == 0)
  163. update_shadow_rptr(gpu, ring);
  164. }
  165. /*
  166. * Write the render mode to NULL (0) to indicate to the CP that the IBs
  167. * are done rendering - otherwise a lucky preemption would start
  168. * replaying from the last checkpoint
  169. */
  170. OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
  171. OUT_RING(ring, 0);
  172. OUT_RING(ring, 0);
  173. OUT_RING(ring, 0);
  174. OUT_RING(ring, 0);
  175. OUT_RING(ring, 0);
  176. /* Turn off IB level preemptions */
  177. OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
  178. OUT_RING(ring, 0x01);
  179. /* Write the fence to the scratch register */
  180. OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
  181. OUT_RING(ring, submit->seqno);
  182. a5xx_gpu->last_seqno[ring->id] = submit->seqno;
  183. /*
  184. * Execute a CACHE_FLUSH_TS event. This will ensure that the
  185. * timestamp is written to the memory and then triggers the interrupt
  186. */
  187. OUT_PKT7(ring, CP_EVENT_WRITE, 4);
  188. OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
  189. CP_EVENT_WRITE_0_IRQ);
  190. OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
  191. OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
  192. OUT_RING(ring, submit->seqno);
  193. /* Yield the floor on command completion */
  194. OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
  195. /*
  196. * If dword[2:1] are non zero, they specify an address for the CP to
  197. * write the value of dword[3] to on preemption complete. Write 0 to
  198. * skip the write
  199. */
  200. OUT_RING(ring, 0x00);
  201. OUT_RING(ring, 0x00);
  202. /* Data value - not used if the address above is 0 */
  203. OUT_RING(ring, 0x01);
  204. /* Set bit 0 to trigger an interrupt on preempt complete */
  205. OUT_RING(ring, 0x01);
  206. /* A WHERE_AM_I packet is not needed after a YIELD */
  207. a5xx_flush(gpu, ring, false);
  208. /* Check to see if we need to start preemption */
  209. a5xx_preempt_trigger(gpu);
  210. }
  211. static const struct adreno_five_hwcg_regs {
  212. u32 offset;
  213. u32 value;
  214. } a5xx_hwcg[] = {
  215. {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
  216. {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
  217. {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
  218. {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
  219. {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
  220. {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
  221. {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
  222. {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
  223. {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
  224. {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
  225. {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
  226. {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
  227. {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
  228. {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
  229. {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
  230. {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
  231. {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
  232. {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
  233. {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
  234. {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
  235. {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
  236. {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
  237. {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
  238. {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
  239. {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
  240. {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
  241. {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
  242. {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
  243. {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
  244. {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
  245. {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
  246. {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
  247. {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
  248. {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
  249. {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
  250. {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
  251. {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
  252. {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
  253. {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
  254. {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
  255. {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
  256. {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
  257. {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
  258. {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
  259. {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
  260. {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
  261. {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
  262. {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
  263. {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
  264. {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
  265. {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
  266. {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
  267. {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
  268. {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
  269. {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
  270. {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
  271. {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
  272. {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
  273. {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
  274. {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
  275. {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
  276. {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
  277. {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
  278. {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
  279. {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
  280. {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
  281. {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
  282. {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
  283. {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
  284. {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
  285. {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
  286. {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
  287. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
  288. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
  289. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
  290. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
  291. {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
  292. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
  293. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
  294. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
  295. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
  296. {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
  297. {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
  298. {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
  299. {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
  300. {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
  301. {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
  302. {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
  303. {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
  304. {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
  305. {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
  306. {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
  307. }, a50x_hwcg[] = {
  308. {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
  309. {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
  310. {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
  311. {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
  312. {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
  313. {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
  314. {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
  315. {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
  316. {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
  317. {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
  318. {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
  319. {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
  320. {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
  321. {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
  322. {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
  323. {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
  324. {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
  325. {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
  326. {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
  327. {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
  328. {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
  329. {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
  330. {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
  331. {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
  332. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
  333. {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
  334. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
  335. {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
  336. {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
  337. {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
  338. {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
  339. {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
  340. {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
  341. {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
  342. {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
  343. {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
  344. {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
  345. {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
  346. }, a512_hwcg[] = {
  347. {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
  348. {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
  349. {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
  350. {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
  351. {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
  352. {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
  353. {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
  354. {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
  355. {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
  356. {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
  357. {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
  358. {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
  359. {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
  360. {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
  361. {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
  362. {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
  363. {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
  364. {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
  365. {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
  366. {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
  367. {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
  368. {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
  369. {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
  370. {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
  371. {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
  372. {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
  373. {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
  374. {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
  375. {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
  376. {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
  377. {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
  378. {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
  379. {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
  380. {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
  381. {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
  382. {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
  383. {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
  384. {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
  385. {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
  386. {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
  387. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
  388. {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
  389. {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
  390. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
  391. {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
  392. {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
  393. {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
  394. {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
  395. {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
  396. {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
  397. {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
  398. {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
  399. {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
  400. {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
  401. {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
  402. {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
  403. };
  404. void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
  405. {
  406. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  407. const struct adreno_five_hwcg_regs *regs;
  408. unsigned int i, sz;
  409. if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
  410. adreno_is_a508(adreno_gpu)) {
  411. regs = a50x_hwcg;
  412. sz = ARRAY_SIZE(a50x_hwcg);
  413. } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
  414. regs = a512_hwcg;
  415. sz = ARRAY_SIZE(a512_hwcg);
  416. } else {
  417. regs = a5xx_hwcg;
  418. sz = ARRAY_SIZE(a5xx_hwcg);
  419. }
  420. for (i = 0; i < sz; i++)
  421. gpu_write(gpu, regs[i].offset,
  422. state ? regs[i].value : 0);
  423. if (adreno_is_a540(adreno_gpu)) {
  424. gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
  425. gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
  426. }
  427. gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
  428. gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
  429. }
  430. static int a5xx_me_init(struct msm_gpu *gpu)
  431. {
  432. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  433. struct msm_ringbuffer *ring = gpu->rb[0];
  434. OUT_PKT7(ring, CP_ME_INIT, 8);
  435. OUT_RING(ring, 0x0000002F);
  436. /* Enable multiple hardware contexts */
  437. OUT_RING(ring, 0x00000003);
  438. /* Enable error detection */
  439. OUT_RING(ring, 0x20000000);
  440. /* Don't enable header dump */
  441. OUT_RING(ring, 0x00000000);
  442. OUT_RING(ring, 0x00000000);
  443. /* Specify workarounds for various microcode issues */
  444. if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
  445. adreno_is_a530(adreno_gpu)) {
  446. /* Workaround for token end syncs
  447. * Force a WFI after every direct-render 3D mode draw and every
  448. * 2D mode 3 draw
  449. */
  450. OUT_RING(ring, 0x0000000B);
  451. } else if (adreno_is_a510(adreno_gpu)) {
  452. /* Workaround for token and syncs */
  453. OUT_RING(ring, 0x00000001);
  454. } else {
  455. /* No workarounds enabled */
  456. OUT_RING(ring, 0x00000000);
  457. }
  458. OUT_RING(ring, 0x00000000);
  459. OUT_RING(ring, 0x00000000);
  460. a5xx_flush(gpu, ring, true);
  461. return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
  462. }
  463. static int a5xx_preempt_start(struct msm_gpu *gpu)
  464. {
  465. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  466. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  467. struct msm_ringbuffer *ring = gpu->rb[0];
  468. if (gpu->nr_rings == 1)
  469. return 0;
  470. /* Turn off protected mode to write to special registers */
  471. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  472. OUT_RING(ring, 0);
  473. /* Set the save preemption record for the ring/command */
  474. OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
  475. OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
  476. OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
  477. /* Turn back on protected mode */
  478. OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
  479. OUT_RING(ring, 1);
  480. OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
  481. OUT_RING(ring, 0x00);
  482. OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
  483. OUT_RING(ring, 0x01);
  484. OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
  485. OUT_RING(ring, 0x01);
  486. /* Yield the floor on command completion */
  487. OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
  488. OUT_RING(ring, 0x00);
  489. OUT_RING(ring, 0x00);
  490. OUT_RING(ring, 0x01);
  491. OUT_RING(ring, 0x01);
  492. /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
  493. a5xx_flush(gpu, ring, false);
  494. return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
  495. }
  496. static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
  497. struct drm_gem_object *obj)
  498. {
  499. u32 *buf = msm_gem_get_vaddr(obj);
  500. if (IS_ERR(buf))
  501. return;
  502. /*
  503. * If the lowest nibble is 0xa that is an indication that this microcode
  504. * has been patched. The actual version is in dword [3] but we only care
  505. * about the patchlevel which is the lowest nibble of dword [3]
  506. */
  507. if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
  508. a5xx_gpu->has_whereami = true;
  509. msm_gem_put_vaddr(obj);
  510. }
  511. static int a5xx_ucode_load(struct msm_gpu *gpu)
  512. {
  513. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  514. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  515. int ret;
  516. if (!a5xx_gpu->pm4_bo) {
  517. a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
  518. adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
  519. if (IS_ERR(a5xx_gpu->pm4_bo)) {
  520. ret = PTR_ERR(a5xx_gpu->pm4_bo);
  521. a5xx_gpu->pm4_bo = NULL;
  522. DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
  523. ret);
  524. return ret;
  525. }
  526. msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
  527. }
  528. if (!a5xx_gpu->pfp_bo) {
  529. a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
  530. adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
  531. if (IS_ERR(a5xx_gpu->pfp_bo)) {
  532. ret = PTR_ERR(a5xx_gpu->pfp_bo);
  533. a5xx_gpu->pfp_bo = NULL;
  534. DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
  535. ret);
  536. return ret;
  537. }
  538. msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
  539. a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
  540. }
  541. if (a5xx_gpu->has_whereami) {
  542. if (!a5xx_gpu->shadow_bo) {
  543. a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
  544. sizeof(u32) * gpu->nr_rings,
  545. MSM_BO_WC | MSM_BO_MAP_PRIV,
  546. gpu->vm, &a5xx_gpu->shadow_bo,
  547. &a5xx_gpu->shadow_iova);
  548. if (IS_ERR(a5xx_gpu->shadow))
  549. return PTR_ERR(a5xx_gpu->shadow);
  550. msm_gem_object_set_name(a5xx_gpu->shadow_bo, "shadow");
  551. }
  552. } else if (gpu->nr_rings > 1) {
  553. /* Disable preemption if WHERE_AM_I isn't available */
  554. a5xx_preempt_fini(gpu);
  555. gpu->nr_rings = 1;
  556. }
  557. return 0;
  558. }
  559. #define SCM_GPU_ZAP_SHADER_RESUME 0
  560. static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
  561. {
  562. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  563. int ret;
  564. /*
  565. * Adreno 506 have CPZ Retention feature and doesn't require
  566. * to resume zap shader
  567. */
  568. if (adreno_is_a506(adreno_gpu))
  569. return 0;
  570. ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
  571. if (ret)
  572. DRM_ERROR("%s: zap-shader resume failed: %d\n",
  573. gpu->name, ret);
  574. return ret;
  575. }
  576. static int a5xx_zap_shader_init(struct msm_gpu *gpu)
  577. {
  578. static bool loaded;
  579. int ret;
  580. /*
  581. * If the zap shader is already loaded into memory we just need to kick
  582. * the remote processor to reinitialize it
  583. */
  584. if (loaded)
  585. return a5xx_zap_shader_resume(gpu);
  586. ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
  587. loaded = !ret;
  588. return ret;
  589. }
  590. #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
  591. A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
  592. A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
  593. A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
  594. A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
  595. A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
  596. A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
  597. A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
  598. A5XX_RBBM_INT_0_MASK_CP_SW | \
  599. A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
  600. A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
  601. A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
  602. static int a5xx_hw_init(struct msm_gpu *gpu)
  603. {
  604. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  605. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  606. u32 hbb;
  607. int ret;
  608. gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
  609. if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
  610. adreno_is_a540(adreno_gpu))
  611. gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
  612. /* Make all blocks contribute to the GPU BUSY perf counter */
  613. gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
  614. /* Enable RBBM error reporting bits */
  615. gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
  616. if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
  617. /*
  618. * Mask out the activity signals from RB1-3 to avoid false
  619. * positives
  620. */
  621. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
  622. 0xF0000000);
  623. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
  624. 0xFFFFFFFF);
  625. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
  626. 0xFFFFFFFF);
  627. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
  628. 0xFFFFFFFF);
  629. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
  630. 0xFFFFFFFF);
  631. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
  632. 0xFFFFFFFF);
  633. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
  634. 0xFFFFFFFF);
  635. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
  636. 0xFFFFFFFF);
  637. }
  638. /* Enable fault detection */
  639. gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
  640. (1 << 30) | 0xFFFF);
  641. /* Turn on performance counters */
  642. gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
  643. /* Select CP0 to always count cycles */
  644. gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
  645. /* Select RBBM0 to countable 6 to get the busy status for devfreq */
  646. gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
  647. /* Increase VFD cache access so LRZ and other data gets evicted less */
  648. gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
  649. /* Disable L2 bypass in the UCHE */
  650. gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base));
  651. gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base));
  652. gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base));
  653. gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base));
  654. /* Set the GMEM VA range (0 to gpu->gmem) */
  655. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
  656. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
  657. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
  658. 0x00100000 + adreno_gpu->info->gmem - 1);
  659. gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
  660. if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
  661. adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) {
  662. gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
  663. if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
  664. adreno_is_a508(adreno_gpu))
  665. gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
  666. else
  667. gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
  668. gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
  669. gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
  670. } else {
  671. gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
  672. if (adreno_is_a530(adreno_gpu))
  673. gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
  674. else
  675. gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
  676. gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
  677. gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
  678. }
  679. if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
  680. adreno_is_a508(adreno_gpu))
  681. gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
  682. (0x100 << 11 | 0x100 << 22));
  683. else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
  684. adreno_is_a512(adreno_gpu))
  685. gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
  686. (0x200 << 11 | 0x200 << 22));
  687. else
  688. gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
  689. (0x400 << 11 | 0x300 << 22));
  690. if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
  691. gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
  692. /*
  693. * Disable the RB sampler datapath DP2 clock gating optimization
  694. * for 1-SP GPUs, as it is enabled by default.
  695. */
  696. if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
  697. adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
  698. adreno_is_a512(adreno_gpu))
  699. gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
  700. /* Disable UCHE global filter as SP can invalidate/flush independently */
  701. gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
  702. /* Enable USE_RETENTION_FLOPS */
  703. gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
  704. /* Enable ME/PFP split notification */
  705. gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
  706. /*
  707. * In A5x, CCU can send context_done event of a particular context to
  708. * UCHE which ultimately reaches CP even when there is valid
  709. * transaction of that context inside CCU. This can let CP to program
  710. * config registers, which will make the "valid transaction" inside
  711. * CCU to be interpreted differently. This can cause gpu fault. This
  712. * bug is fixed in latest A510 revision. To enable this bug fix -
  713. * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
  714. * (disable). For older A510 version this bit is unused.
  715. */
  716. if (adreno_is_a510(adreno_gpu))
  717. gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
  718. /* Enable HWCG */
  719. a5xx_set_hwcg(gpu, true);
  720. gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
  721. BUG_ON(adreno_gpu->ubwc_config->highest_bank_bit < 13);
  722. hbb = adreno_gpu->ubwc_config->highest_bank_bit - 13;
  723. gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, hbb << 7);
  724. gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, hbb << 1);
  725. if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
  726. adreno_is_a540(adreno_gpu))
  727. gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, hbb);
  728. /* Disable All flat shading optimization (ALLFLATOPTDIS) */
  729. gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
  730. /* Protect registers from the CP */
  731. gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
  732. /* RBBM */
  733. gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
  734. gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
  735. gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
  736. gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
  737. gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
  738. gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
  739. /* Content protect */
  740. gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
  741. ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
  742. 16));
  743. gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
  744. ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
  745. /* CP */
  746. gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
  747. gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
  748. gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
  749. gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
  750. /* RB */
  751. gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
  752. gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
  753. /* VPC */
  754. gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
  755. gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
  756. /* UCHE */
  757. gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
  758. /* SMMU */
  759. gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
  760. ADRENO_PROTECT_RW(0x10000, 0x8000));
  761. gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
  762. /*
  763. * Disable the trusted memory range - we don't actually supported secure
  764. * memory rendering at this point in time and we don't want to block off
  765. * part of the virtual memory space.
  766. */
  767. gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
  768. gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
  769. /* Put the GPU into 64 bit by default */
  770. gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
  771. gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
  772. gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
  773. gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
  774. gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
  775. gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
  776. gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
  777. gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
  778. gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
  779. gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
  780. gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
  781. gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
  782. /*
  783. * VPC corner case with local memory load kill leads to corrupt
  784. * internal state. Normal Disable does not work for all a5x chips.
  785. * So do the following setting to disable it.
  786. */
  787. if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
  788. gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
  789. gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
  790. }
  791. ret = adreno_hw_init(gpu);
  792. if (ret)
  793. return ret;
  794. if (adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))
  795. a5xx_gpmu_ucode_init(gpu);
  796. gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
  797. gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
  798. /* Set the ringbuffer address */
  799. gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
  800. /*
  801. * If the microcode supports the WHERE_AM_I opcode then we can use that
  802. * in lieu of the RPTR shadow and enable preemption. Otherwise, we
  803. * can't safely use the RPTR shadow or preemption. In either case, the
  804. * RPTR shadow should be disabled in hardware.
  805. */
  806. gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
  807. MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
  808. /* Configure the RPTR shadow if needed: */
  809. if (a5xx_gpu->shadow_bo) {
  810. gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
  811. shadowptr(a5xx_gpu, gpu->rb[0]));
  812. }
  813. a5xx_preempt_hw_init(gpu);
  814. /* Disable the interrupts through the initial bringup stage */
  815. gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
  816. /* Clear ME_HALT to start the micro engine */
  817. gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
  818. ret = a5xx_me_init(gpu);
  819. if (ret)
  820. return ret;
  821. ret = a5xx_power_init(gpu);
  822. if (ret)
  823. return ret;
  824. /*
  825. * Send a pipeline event stat to get misbehaving counters to start
  826. * ticking correctly
  827. */
  828. if (adreno_is_a530(adreno_gpu)) {
  829. OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
  830. OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
  831. a5xx_flush(gpu, gpu->rb[0], true);
  832. if (!a5xx_idle(gpu, gpu->rb[0]))
  833. return -EINVAL;
  834. }
  835. /*
  836. * If the chip that we are using does support loading one, then
  837. * try to load a zap shader into the secure world. If successful
  838. * we can use the CP to switch out of secure mode. If not then we
  839. * have no resource but to try to switch ourselves out manually. If we
  840. * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
  841. * be blocked and a permissions violation will soon follow.
  842. */
  843. ret = a5xx_zap_shader_init(gpu);
  844. if (!ret) {
  845. OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
  846. OUT_RING(gpu->rb[0], 0x00000000);
  847. a5xx_flush(gpu, gpu->rb[0], true);
  848. if (!a5xx_idle(gpu, gpu->rb[0]))
  849. return -EINVAL;
  850. } else if (ret == -ENODEV) {
  851. /*
  852. * This device does not use zap shader (but print a warning
  853. * just in case someone got their dt wrong.. hopefully they
  854. * have a debug UART to realize the error of their ways...
  855. * if you mess this up you are about to crash horribly)
  856. */
  857. dev_warn_once(gpu->dev->dev,
  858. "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
  859. gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
  860. } else {
  861. return ret;
  862. }
  863. /* Last step - yield the ringbuffer */
  864. a5xx_preempt_start(gpu);
  865. return 0;
  866. }
  867. static void a5xx_recover(struct msm_gpu *gpu)
  868. {
  869. int i;
  870. adreno_dump_info(gpu);
  871. for (i = 0; i < 8; i++) {
  872. printk("CP_SCRATCH_REG%d: %u\n", i,
  873. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
  874. }
  875. if (hang_debug)
  876. a5xx_dump(gpu);
  877. gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
  878. gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
  879. gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
  880. adreno_recover(gpu);
  881. }
  882. static void a5xx_destroy(struct msm_gpu *gpu)
  883. {
  884. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  885. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  886. DBG("%s", gpu->name);
  887. a5xx_preempt_fini(gpu);
  888. if (a5xx_gpu->pm4_bo) {
  889. msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->vm);
  890. drm_gem_object_put(a5xx_gpu->pm4_bo);
  891. }
  892. if (a5xx_gpu->pfp_bo) {
  893. msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->vm);
  894. drm_gem_object_put(a5xx_gpu->pfp_bo);
  895. }
  896. if (a5xx_gpu->gpmu_bo) {
  897. msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->vm);
  898. drm_gem_object_put(a5xx_gpu->gpmu_bo);
  899. }
  900. if (a5xx_gpu->shadow_bo) {
  901. msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->vm);
  902. drm_gem_object_put(a5xx_gpu->shadow_bo);
  903. }
  904. adreno_gpu_cleanup(adreno_gpu);
  905. kfree(a5xx_gpu);
  906. }
  907. static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
  908. {
  909. if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
  910. return false;
  911. /*
  912. * Nearly every abnormality ends up pausing the GPU and triggering a
  913. * fault so we can safely just watch for this one interrupt to fire
  914. */
  915. return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
  916. A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
  917. }
  918. bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  919. {
  920. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  921. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  922. if (ring != a5xx_gpu->cur_ring) {
  923. WARN(1, "Tried to idle a non-current ringbuffer\n");
  924. return false;
  925. }
  926. /* wait for CP to drain ringbuffer: */
  927. if (!adreno_idle(gpu, ring))
  928. return false;
  929. if (spin_until(_a5xx_check_idle(gpu))) {
  930. DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
  931. gpu->name, __builtin_return_address(0),
  932. gpu_read(gpu, REG_A5XX_RBBM_STATUS),
  933. gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
  934. gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
  935. gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
  936. return false;
  937. }
  938. return true;
  939. }
  940. static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
  941. {
  942. struct msm_gpu *gpu = arg;
  943. struct adreno_smmu_fault_info *info = data;
  944. char block[12] = "unknown";
  945. u32 scratch[] = {
  946. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
  947. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
  948. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
  949. gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)),
  950. };
  951. if (info)
  952. snprintf(block, sizeof(block), "%x", info->fsynr1);
  953. return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
  954. }
  955. static void a5xx_cp_err_irq(struct msm_gpu *gpu)
  956. {
  957. u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
  958. if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
  959. u32 val;
  960. gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
  961. /*
  962. * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
  963. * read it twice
  964. */
  965. gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
  966. val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
  967. dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
  968. val);
  969. }
  970. if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
  971. dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
  972. gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
  973. if (status & A5XX_CP_INT_CP_DMA_ERROR)
  974. dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
  975. if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
  976. u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
  977. dev_err_ratelimited(gpu->dev->dev,
  978. "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
  979. val & (1 << 24) ? "WRITE" : "READ",
  980. (val & 0xFFFFF) >> 2, val);
  981. }
  982. if (status & A5XX_CP_INT_CP_AHB_ERROR) {
  983. u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
  984. const char *access[16] = { "reserved", "reserved",
  985. "timestamp lo", "timestamp hi", "pfp read", "pfp write",
  986. "", "", "me read", "me write", "", "", "crashdump read",
  987. "crashdump write" };
  988. dev_err_ratelimited(gpu->dev->dev,
  989. "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
  990. status & 0xFFFFF, access[(status >> 24) & 0xF],
  991. (status & (1 << 31)), status);
  992. }
  993. }
  994. static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
  995. {
  996. if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
  997. u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
  998. dev_err_ratelimited(gpu->dev->dev,
  999. "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
  1000. val & (1 << 28) ? "WRITE" : "READ",
  1001. (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
  1002. (val >> 24) & 0xF);
  1003. /* Clear the error */
  1004. gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
  1005. /* Clear the interrupt */
  1006. gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
  1007. A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
  1008. }
  1009. if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
  1010. dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
  1011. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
  1012. dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
  1013. gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
  1014. if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
  1015. dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
  1016. gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
  1017. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
  1018. dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
  1019. gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
  1020. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
  1021. dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
  1022. if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
  1023. dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
  1024. }
  1025. static void a5xx_uche_err_irq(struct msm_gpu *gpu)
  1026. {
  1027. uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
  1028. addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
  1029. dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
  1030. addr);
  1031. }
  1032. static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
  1033. {
  1034. dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
  1035. }
  1036. static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
  1037. {
  1038. struct drm_device *dev = gpu->dev;
  1039. struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
  1040. /*
  1041. * If stalled on SMMU fault, we could trip the GPU's hang detection,
  1042. * but the fault handler will trigger the devcore dump, and we want
  1043. * to otherwise resume normally rather than killing the submit, so
  1044. * just bail.
  1045. */
  1046. if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
  1047. return;
  1048. DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
  1049. ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
  1050. gpu_read(gpu, REG_A5XX_RBBM_STATUS),
  1051. gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
  1052. gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
  1053. gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
  1054. gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
  1055. gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
  1056. gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
  1057. /* Turn off the hangcheck timer to keep it from bothering us */
  1058. timer_delete(&gpu->hangcheck_timer);
  1059. kthread_queue_work(gpu->worker, &gpu->recover_work);
  1060. }
  1061. #define RBBM_ERROR_MASK \
  1062. (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
  1063. A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
  1064. A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
  1065. A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
  1066. A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
  1067. A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
  1068. static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
  1069. {
  1070. struct msm_drm_private *priv = gpu->dev->dev_private;
  1071. u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
  1072. /*
  1073. * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
  1074. * before the source is cleared the interrupt will storm.
  1075. */
  1076. gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
  1077. status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
  1078. if (priv->disable_err_irq) {
  1079. status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS |
  1080. A5XX_RBBM_INT_0_MASK_CP_SW;
  1081. }
  1082. /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
  1083. if (status & RBBM_ERROR_MASK)
  1084. a5xx_rbbm_err_irq(gpu, status);
  1085. if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
  1086. a5xx_cp_err_irq(gpu);
  1087. if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
  1088. a5xx_fault_detect_irq(gpu);
  1089. if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
  1090. a5xx_uche_err_irq(gpu);
  1091. if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
  1092. a5xx_gpmu_err_irq(gpu);
  1093. if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
  1094. a5xx_preempt_trigger(gpu);
  1095. msm_gpu_retire(gpu);
  1096. }
  1097. if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
  1098. a5xx_preempt_irq(gpu);
  1099. return IRQ_HANDLED;
  1100. }
  1101. static const u32 a5xx_registers[] = {
  1102. 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
  1103. 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
  1104. 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
  1105. 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
  1106. 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
  1107. 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
  1108. 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
  1109. 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
  1110. 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
  1111. 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
  1112. 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
  1113. 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
  1114. 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
  1115. 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
  1116. 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
  1117. 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
  1118. 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
  1119. 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
  1120. 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
  1121. 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
  1122. 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
  1123. 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
  1124. 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
  1125. 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
  1126. 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
  1127. 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
  1128. 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
  1129. 0xAC60, 0xAC60, ~0,
  1130. };
  1131. static void a5xx_dump(struct msm_gpu *gpu)
  1132. {
  1133. DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
  1134. gpu_read(gpu, REG_A5XX_RBBM_STATUS));
  1135. adreno_dump(gpu);
  1136. }
  1137. static int a5xx_pm_resume(struct msm_gpu *gpu)
  1138. {
  1139. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  1140. int ret;
  1141. /* Turn on the core power */
  1142. ret = msm_gpu_pm_resume(gpu);
  1143. if (ret)
  1144. return ret;
  1145. /* Adreno 505, 506, 508, 509, 510, 512 needs manual RBBM sus/res control */
  1146. if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
  1147. /* Halt the sp_input_clk at HM level */
  1148. gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
  1149. a5xx_set_hwcg(gpu, true);
  1150. /* Turn on sp_input_clk at HM level */
  1151. gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
  1152. return 0;
  1153. }
  1154. /* Turn the RBCCU domain first to limit the chances of voltage droop */
  1155. gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
  1156. /* Wait 3 usecs before polling */
  1157. udelay(3);
  1158. ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
  1159. (1 << 20), (1 << 20));
  1160. if (ret) {
  1161. DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
  1162. gpu->name,
  1163. gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
  1164. return ret;
  1165. }
  1166. /* Turn on the SP domain */
  1167. gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
  1168. ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
  1169. (1 << 20), (1 << 20));
  1170. if (ret)
  1171. DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
  1172. gpu->name);
  1173. return ret;
  1174. }
  1175. static int a5xx_pm_suspend(struct msm_gpu *gpu)
  1176. {
  1177. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  1178. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  1179. u32 mask = 0xf;
  1180. int i, ret;
  1181. /* A505, A506, A508, A510 have 3 XIN ports in VBIF */
  1182. if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
  1183. adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu))
  1184. mask = 0x7;
  1185. /* Clear the VBIF pipe before shutting down */
  1186. gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
  1187. spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
  1188. mask) == mask);
  1189. gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
  1190. /*
  1191. * Reset the VBIF before power collapse to avoid issue with FIFO
  1192. * entries on Adreno A510 and A530 (the others will tend to lock up)
  1193. */
  1194. if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
  1195. gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
  1196. gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
  1197. }
  1198. ret = msm_gpu_pm_suspend(gpu);
  1199. if (ret)
  1200. return ret;
  1201. if (a5xx_gpu->has_whereami)
  1202. for (i = 0; i < gpu->nr_rings; i++)
  1203. a5xx_gpu->shadow[i] = 0;
  1204. return 0;
  1205. }
  1206. static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
  1207. {
  1208. *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
  1209. return 0;
  1210. }
  1211. struct a5xx_crashdumper {
  1212. void *ptr;
  1213. struct drm_gem_object *bo;
  1214. u64 iova;
  1215. };
  1216. struct a5xx_gpu_state {
  1217. struct msm_gpu_state base;
  1218. u32 *hlsqregs;
  1219. };
  1220. static int a5xx_crashdumper_init(struct msm_gpu *gpu,
  1221. struct a5xx_crashdumper *dumper)
  1222. {
  1223. dumper->ptr = msm_gem_kernel_new(gpu->dev,
  1224. SZ_1M, MSM_BO_WC, gpu->vm,
  1225. &dumper->bo, &dumper->iova);
  1226. if (!IS_ERR(dumper->ptr))
  1227. msm_gem_object_set_name(dumper->bo, "crashdump");
  1228. return PTR_ERR_OR_ZERO(dumper->ptr);
  1229. }
  1230. static int a5xx_crashdumper_run(struct msm_gpu *gpu,
  1231. struct a5xx_crashdumper *dumper)
  1232. {
  1233. u32 val;
  1234. if (IS_ERR_OR_NULL(dumper->ptr))
  1235. return -EINVAL;
  1236. gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
  1237. gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
  1238. return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
  1239. val & 0x04, 100, 10000);
  1240. }
  1241. /*
  1242. * These are a list of the registers that need to be read through the HLSQ
  1243. * aperture through the crashdumper. These are not nominally accessible from
  1244. * the CPU on a secure platform.
  1245. */
  1246. static const struct {
  1247. u32 type;
  1248. u32 regoffset;
  1249. u32 count;
  1250. } a5xx_hlsq_aperture_regs[] = {
  1251. { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
  1252. { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
  1253. { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
  1254. { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
  1255. { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
  1256. { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
  1257. { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
  1258. { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
  1259. { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
  1260. { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
  1261. { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
  1262. { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
  1263. { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
  1264. { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
  1265. { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
  1266. };
  1267. static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
  1268. struct a5xx_gpu_state *a5xx_state)
  1269. {
  1270. struct a5xx_crashdumper dumper = { 0 };
  1271. u32 offset, count = 0;
  1272. u64 *ptr;
  1273. int i;
  1274. if (a5xx_crashdumper_init(gpu, &dumper))
  1275. return;
  1276. /* The script will be written at offset 0 */
  1277. ptr = dumper.ptr;
  1278. /* Start writing the data at offset 256k */
  1279. offset = dumper.iova + (256 * SZ_1K);
  1280. /* Count how many additional registers to get from the HLSQ aperture */
  1281. for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
  1282. count += a5xx_hlsq_aperture_regs[i].count;
  1283. a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
  1284. if (!a5xx_state->hlsqregs)
  1285. return;
  1286. /* Build the crashdump script */
  1287. for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
  1288. u32 type = a5xx_hlsq_aperture_regs[i].type;
  1289. u32 c = a5xx_hlsq_aperture_regs[i].count;
  1290. /* Write the register to select the desired bank */
  1291. *ptr++ = ((u64) type << 8);
  1292. *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
  1293. (1 << 21) | 1;
  1294. *ptr++ = offset;
  1295. *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
  1296. | c;
  1297. offset += c * sizeof(u32);
  1298. }
  1299. /* Write two zeros to close off the script */
  1300. *ptr++ = 0;
  1301. *ptr++ = 0;
  1302. if (a5xx_crashdumper_run(gpu, &dumper)) {
  1303. kfree(a5xx_state->hlsqregs);
  1304. msm_gem_kernel_put(dumper.bo, gpu->vm);
  1305. return;
  1306. }
  1307. /* Copy the data from the crashdumper to the state */
  1308. memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
  1309. count * sizeof(u32));
  1310. msm_gem_kernel_put(dumper.bo, gpu->vm);
  1311. }
  1312. static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
  1313. {
  1314. struct a5xx_gpu_state *a5xx_state = kzalloc_obj(*a5xx_state);
  1315. bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
  1316. if (!a5xx_state)
  1317. return ERR_PTR(-ENOMEM);
  1318. /* Temporarily disable hardware clock gating before reading the hw */
  1319. a5xx_set_hwcg(gpu, false);
  1320. /* First get the generic state from the adreno core */
  1321. adreno_gpu_state_get(gpu, &(a5xx_state->base));
  1322. a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
  1323. /*
  1324. * Get the HLSQ regs with the help of the crashdumper, but only if
  1325. * we are not stalled in an iommu fault (in which case the crashdumper
  1326. * would not have access to memory)
  1327. */
  1328. if (!stalled)
  1329. a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
  1330. a5xx_set_hwcg(gpu, true);
  1331. return &a5xx_state->base;
  1332. }
  1333. static void a5xx_gpu_state_destroy(struct kref *kref)
  1334. {
  1335. struct msm_gpu_state *state = container_of(kref,
  1336. struct msm_gpu_state, ref);
  1337. struct a5xx_gpu_state *a5xx_state = container_of(state,
  1338. struct a5xx_gpu_state, base);
  1339. kfree(a5xx_state->hlsqregs);
  1340. adreno_gpu_state_destroy(state);
  1341. kfree(a5xx_state);
  1342. }
  1343. static int a5xx_gpu_state_put(struct msm_gpu_state *state)
  1344. {
  1345. if (IS_ERR_OR_NULL(state))
  1346. return 1;
  1347. return kref_put(&state->ref, a5xx_gpu_state_destroy);
  1348. }
  1349. #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
  1350. static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
  1351. struct drm_printer *p)
  1352. {
  1353. int i, j;
  1354. u32 pos = 0;
  1355. struct a5xx_gpu_state *a5xx_state = container_of(state,
  1356. struct a5xx_gpu_state, base);
  1357. if (IS_ERR_OR_NULL(state))
  1358. return;
  1359. adreno_show(gpu, state, p);
  1360. /* Dump the additional a5xx HLSQ registers */
  1361. if (!a5xx_state->hlsqregs)
  1362. return;
  1363. drm_printf(p, "registers-hlsq:\n");
  1364. for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
  1365. u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
  1366. u32 c = a5xx_hlsq_aperture_regs[i].count;
  1367. for (j = 0; j < c; j++, pos++, o++) {
  1368. /*
  1369. * To keep the crashdump simple we pull the entire range
  1370. * for each register type but not all of the registers
  1371. * in the range are valid. Fortunately invalid registers
  1372. * stick out like a sore thumb with a value of
  1373. * 0xdeadbeef
  1374. */
  1375. if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
  1376. continue;
  1377. drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
  1378. o << 2, a5xx_state->hlsqregs[pos]);
  1379. }
  1380. }
  1381. }
  1382. #endif
  1383. static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
  1384. {
  1385. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  1386. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  1387. return a5xx_gpu->cur_ring;
  1388. }
  1389. static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
  1390. {
  1391. u64 busy_cycles;
  1392. busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
  1393. *out_sample_rate = clk_get_rate(gpu->core_clk);
  1394. return busy_cycles;
  1395. }
  1396. static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  1397. {
  1398. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  1399. struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  1400. if (a5xx_gpu->has_whereami)
  1401. return a5xx_gpu->shadow[ring->id];
  1402. return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
  1403. }
  1404. static void check_speed_bin(struct device *dev)
  1405. {
  1406. struct nvmem_cell *cell;
  1407. u32 val;
  1408. /*
  1409. * If the OPP table specifies a opp-supported-hw property then we have
  1410. * to set something with dev_pm_opp_set_supported_hw() or the table
  1411. * doesn't get populated so pick an arbitrary value that should
  1412. * ensure the default frequencies are selected but not conflict with any
  1413. * actual bins
  1414. */
  1415. val = 0x80;
  1416. cell = nvmem_cell_get(dev, "speed_bin");
  1417. if (!IS_ERR(cell)) {
  1418. void *buf = nvmem_cell_read(cell, NULL);
  1419. if (!IS_ERR(buf)) {
  1420. u8 bin = *((u8 *) buf);
  1421. val = (1 << bin);
  1422. kfree(buf);
  1423. }
  1424. nvmem_cell_put(cell);
  1425. }
  1426. devm_pm_opp_set_supported_hw(dev, &val, 1);
  1427. }
  1428. static struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
  1429. {
  1430. struct msm_drm_private *priv = dev->dev_private;
  1431. struct platform_device *pdev = priv->gpu_pdev;
  1432. struct adreno_platform_config *config = pdev->dev.platform_data;
  1433. const struct qcom_ubwc_cfg_data *common_cfg;
  1434. struct a5xx_gpu *a5xx_gpu = NULL;
  1435. struct adreno_gpu *adreno_gpu;
  1436. struct msm_gpu *gpu;
  1437. unsigned int nr_rings;
  1438. int ret;
  1439. a5xx_gpu = kzalloc_obj(*a5xx_gpu);
  1440. if (!a5xx_gpu)
  1441. return ERR_PTR(-ENOMEM);
  1442. adreno_gpu = &a5xx_gpu->base;
  1443. gpu = &adreno_gpu->base;
  1444. adreno_gpu->registers = a5xx_registers;
  1445. a5xx_gpu->lm_leakage = 0x4E001A;
  1446. check_speed_bin(&pdev->dev);
  1447. nr_rings = 4;
  1448. if (config->info->revn == 510)
  1449. nr_rings = 1;
  1450. ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, nr_rings);
  1451. if (ret) {
  1452. a5xx_destroy(&(a5xx_gpu->base.base));
  1453. return ERR_PTR(ret);
  1454. }
  1455. msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu,
  1456. a5xx_fault_handler);
  1457. /* Set up the preemption specific bits and pieces for each ringbuffer */
  1458. a5xx_preempt_init(gpu);
  1459. /* Inherit the common config and make some necessary fixups */
  1460. common_cfg = qcom_ubwc_config_get_data();
  1461. if (IS_ERR(common_cfg))
  1462. return ERR_CAST(common_cfg);
  1463. /* Copy the data into the internal struct to drop the const qualifier (temporarily) */
  1464. adreno_gpu->_ubwc_config = *common_cfg;
  1465. adreno_gpu->ubwc_config = &adreno_gpu->_ubwc_config;
  1466. adreno_gpu->uche_trap_base = 0x0001ffffffff0000ull;
  1467. return gpu;
  1468. }
  1469. const struct adreno_gpu_funcs a5xx_gpu_funcs = {
  1470. .base = {
  1471. .get_param = adreno_get_param,
  1472. .set_param = adreno_set_param,
  1473. .hw_init = a5xx_hw_init,
  1474. .ucode_load = a5xx_ucode_load,
  1475. .pm_suspend = a5xx_pm_suspend,
  1476. .pm_resume = a5xx_pm_resume,
  1477. .recover = a5xx_recover,
  1478. .submit = a5xx_submit,
  1479. .active_ring = a5xx_active_ring,
  1480. .irq = a5xx_irq,
  1481. .destroy = a5xx_destroy,
  1482. #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
  1483. .show = a5xx_show,
  1484. #endif
  1485. #if defined(CONFIG_DEBUG_FS)
  1486. .debugfs_init = a5xx_debugfs_init,
  1487. #endif
  1488. .gpu_busy = a5xx_gpu_busy,
  1489. .gpu_state_get = a5xx_gpu_state_get,
  1490. .gpu_state_put = a5xx_gpu_state_put,
  1491. .create_vm = adreno_create_vm,
  1492. .get_rptr = a5xx_get_rptr,
  1493. },
  1494. .init = a5xx_gpu_init,
  1495. .get_timestamp = a5xx_get_timestamp,
  1496. };