a8xx_gpu.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
  3. #include "msm_gem.h"
  4. #include "msm_mmu.h"
  5. #include "msm_gpu_trace.h"
  6. #include "a6xx_gpu.h"
  7. #include "a6xx_gmu.xml.h"
  8. #include <linux/bitfield.h>
  9. #include <linux/devfreq.h>
  10. #include <linux/firmware/qcom/qcom_scm.h>
  11. #include <linux/pm_domain.h>
  12. #include <linux/soc/qcom/llcc-qcom.h>
  13. #define GPU_PAS_ID 13
  14. static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice)
  15. {
  16. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  17. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  18. u32 val;
  19. val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice);
  20. if (a6xx_gpu->cached_aperture == val)
  21. return;
  22. gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val);
  23. a6xx_gpu->cached_aperture = val;
  24. }
  25. static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags)
  26. {
  27. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  28. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  29. spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags);
  30. a8xx_aperture_slice_set(gpu, pipe, 0);
  31. }
  32. static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags)
  33. {
  34. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  35. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  36. spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
  37. }
  38. static void a8xx_aperture_clear(struct msm_gpu *gpu)
  39. {
  40. unsigned long flags;
  41. a8xx_aperture_acquire(gpu, PIPE_NONE, &flags);
  42. a8xx_aperture_release(gpu, flags);
  43. }
  44. static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data)
  45. {
  46. unsigned long flags;
  47. a8xx_aperture_acquire(gpu, pipe, &flags);
  48. gpu_write(gpu, offset, data);
  49. a8xx_aperture_release(gpu, flags);
  50. }
  51. static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset)
  52. {
  53. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  54. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  55. unsigned long flags;
  56. u32 val;
  57. spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags);
  58. a8xx_aperture_slice_set(gpu, pipe, slice);
  59. val = gpu_read(gpu, offset);
  60. spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags);
  61. return val;
  62. }
  63. void a8xx_gpu_get_slice_info(struct msm_gpu *gpu)
  64. {
  65. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  66. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  67. const struct a6xx_info *info = adreno_gpu->info->a6xx;
  68. u32 slice_mask;
  69. if (adreno_gpu->info->family < ADRENO_8XX_GEN1)
  70. return;
  71. if (a6xx_gpu->slice_mask)
  72. return;
  73. slice_mask = GENMASK(info->max_slices - 1, 0);
  74. /* GEN1 doesn't support partial slice configurations */
  75. if (adreno_gpu->info->family == ADRENO_8XX_GEN1) {
  76. a6xx_gpu->slice_mask = slice_mask;
  77. return;
  78. }
  79. slice_mask &= a6xx_llc_read(a6xx_gpu,
  80. REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL);
  81. a6xx_gpu->slice_mask = slice_mask;
  82. /* Chip ID depends on the number of slices available. So update it */
  83. adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask));
  84. }
  85. static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu)
  86. {
  87. return ffs(a6xx_gpu->slice_mask) - 1;
  88. }
  89. static inline bool _a8xx_check_idle(struct msm_gpu *gpu)
  90. {
  91. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  92. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  93. /* Check that the GMU is idle */
  94. if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
  95. return false;
  96. /* Check that the CX master is idle */
  97. if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) &
  98. ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
  99. return false;
  100. return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) &
  101. A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
  102. }
  103. static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  104. {
  105. /* wait for CP to drain ringbuffer: */
  106. if (!adreno_idle(gpu, ring))
  107. return false;
  108. if (spin_until(_a8xx_check_idle(gpu))) {
  109. DRM_ERROR(
  110. "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
  111. gpu->name, __builtin_return_address(0),
  112. gpu_read(gpu, REG_A8XX_RBBM_STATUS),
  113. gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS),
  114. gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
  115. gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
  116. return false;
  117. }
  118. return true;
  119. }
  120. void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  121. {
  122. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  123. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  124. uint32_t wptr;
  125. unsigned long flags;
  126. spin_lock_irqsave(&ring->preempt_lock, flags);
  127. /* Copy the shadow to the actual register */
  128. ring->cur = ring->next;
  129. /* Make sure to wrap wptr if we need to */
  130. wptr = get_wptr(ring);
  131. /* Update HW if this is the current ring and we are not in preempt*/
  132. if (!a6xx_in_preempt(a6xx_gpu)) {
  133. if (a6xx_gpu->cur_ring == ring)
  134. gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
  135. else
  136. ring->restore_wptr = true;
  137. } else {
  138. ring->restore_wptr = true;
  139. }
  140. spin_unlock_irqrestore(&ring->preempt_lock, flags);
  141. }
  142. static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state)
  143. {
  144. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  145. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  146. struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
  147. u32 val;
  148. if (adreno_is_x285(adreno_gpu) && state)
  149. gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702);
  150. gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
  151. state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
  152. gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
  153. state ? 0x110111 : 0);
  154. gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
  155. state ? 0x55555 : 0);
  156. gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
  157. gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state);
  158. if (state) {
  159. gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1);
  160. if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val,
  161. val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
  162. dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
  163. return;
  164. }
  165. gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
  166. } else {
  167. /*
  168. * GMU enables clk gating in GBIF during boot up. So,
  169. * override that here when hwcg feature is disabled
  170. */
  171. gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0);
  172. }
  173. }
  174. static void a8xx_set_cp_protect(struct msm_gpu *gpu)
  175. {
  176. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  177. const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
  178. u32 cntl, final_cfg;
  179. unsigned int i;
  180. cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN |
  181. A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN |
  182. A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE |
  183. A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK;
  184. /*
  185. * Enable access protection to privileged registers, fault on an access
  186. * protect violation and select the last span to protect from the start
  187. * address all the way to the end of the register address space
  188. */
  189. a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
  190. a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl);
  191. a8xx_aperture_clear(gpu);
  192. for (i = 0; i < protect->count; i++) {
  193. /* Intentionally skip writing to some registers */
  194. if (protect->regs[i]) {
  195. gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]);
  196. final_cfg = protect->regs[i];
  197. }
  198. }
  199. /*
  200. * Last span feature is only supported on PIPE specific register.
  201. * So update those here
  202. */
  203. a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
  204. a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg);
  205. a8xx_aperture_clear(gpu);
  206. }
  207. static void a8xx_set_ubwc_config(struct msm_gpu *gpu)
  208. {
  209. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  210. const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
  211. u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
  212. u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3);
  213. bool rgba8888_lossless = false, fp16compoptdis = false;
  214. bool yuvnotcomptofc = false, min_acc_len_64b = false;
  215. bool rgb565_predicator = false, amsbc = false;
  216. bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
  217. u32 ubwc_version = cfg->ubwc_enc_version;
  218. u32 hbb, hbb_hi, hbb_lo, mode = 1;
  219. u8 uavflagprd_inv = 2;
  220. switch (ubwc_version) {
  221. case UBWC_6_0:
  222. yuvnotcomptofc = true;
  223. mode = 5;
  224. break;
  225. case UBWC_5_0:
  226. amsbc = true;
  227. rgb565_predicator = true;
  228. mode = 4;
  229. break;
  230. case UBWC_4_0:
  231. amsbc = true;
  232. rgb565_predicator = true;
  233. fp16compoptdis = true;
  234. rgba8888_lossless = true;
  235. mode = 2;
  236. break;
  237. case UBWC_3_0:
  238. amsbc = true;
  239. mode = 1;
  240. break;
  241. default:
  242. dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version);
  243. break;
  244. }
  245. /*
  246. * We subtract 13 from the highest bank bit (13 is the minimum value
  247. * allowed by hw) and write the lowest two bits of the remaining value
  248. * as hbb_lo and the one above it as hbb_hi to the hardware.
  249. */
  250. WARN_ON(cfg->highest_bank_bit < 13);
  251. hbb = cfg->highest_bank_bit - 13;
  252. hbb_hi = hbb >> 2;
  253. hbb_lo = hbb & 3;
  254. a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL,
  255. hbb << 5 |
  256. level3_swizzling_dis << 4 |
  257. level2_swizzling_dis << 3);
  258. a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL,
  259. hbb << 5 |
  260. level3_swizzling_dis << 4 |
  261. level2_swizzling_dis << 3);
  262. a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL,
  263. yuvnotcomptofc << 6 |
  264. level3_swizzling_dis << 5 |
  265. level2_swizzling_dis << 4 |
  266. hbb_hi << 3 |
  267. hbb_lo << 1);
  268. a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL,
  269. mode << 15 |
  270. yuvnotcomptofc << 6 |
  271. rgba8888_lossless << 4 |
  272. fp16compoptdis << 3 |
  273. rgb565_predicator << 2 |
  274. amsbc << 1 |
  275. min_acc_len_64b);
  276. a8xx_aperture_clear(gpu);
  277. gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
  278. level3_swizzling_dis << 13 |
  279. level2_swizzling_dis << 12 |
  280. hbb_hi << 10 |
  281. uavflagprd_inv << 4 |
  282. min_acc_len_64b << 3 |
  283. hbb_lo << 1 | ubwc_mode);
  284. gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
  285. level3_swizzling_dis << 7 |
  286. level2_swizzling_dis << 6 |
  287. hbb_hi << 4 |
  288. min_acc_len_64b << 3 |
  289. hbb_lo << 1 | ubwc_mode);
  290. }
  291. static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect)
  292. {
  293. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  294. const struct a6xx_info *info = adreno_gpu->info->a6xx;
  295. const struct adreno_reglist_pipe *regs = info->nonctxt_reglist;
  296. unsigned int pipe_id, i;
  297. unsigned long flags;
  298. for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
  299. /* We don't have support for LPAC yet */
  300. if (pipe_id == PIPE_LPAC)
  301. continue;
  302. a8xx_aperture_acquire(gpu, pipe_id, &flags);
  303. for (i = 0; regs[i].offset; i++) {
  304. if (!(BIT(pipe_id) & regs[i].pipe))
  305. continue;
  306. if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT)
  307. *gmem_protect = regs[i].value;
  308. gpu_write(gpu, regs[i].offset, regs[i].value);
  309. }
  310. a8xx_aperture_release(gpu, flags);
  311. }
  312. a8xx_aperture_clear(gpu);
  313. }
  314. static int a8xx_cp_init(struct msm_gpu *gpu)
  315. {
  316. struct msm_ringbuffer *ring = gpu->rb[0];
  317. u32 mask;
  318. /* Disable concurrent binning before sending CP init */
  319. OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
  320. OUT_RING(ring, BIT(27));
  321. OUT_PKT7(ring, CP_ME_INIT, 4);
  322. /* Use multiple HW contexts */
  323. mask = BIT(0);
  324. /* Enable error detection */
  325. mask |= BIT(1);
  326. /* Set default reset state */
  327. mask |= BIT(3);
  328. /* Disable save/restore of performance counters across preemption */
  329. mask |= BIT(6);
  330. OUT_RING(ring, mask);
  331. /* Enable multiple hardware contexts */
  332. OUT_RING(ring, 0x00000003);
  333. /* Enable error detection */
  334. OUT_RING(ring, 0x20000000);
  335. /* Operation mode mask */
  336. OUT_RING(ring, 0x00000002);
  337. a6xx_flush(gpu, ring);
  338. return a8xx_idle(gpu, ring) ? 0 : -EINVAL;
  339. }
  340. #define A8XX_INT_MASK \
  341. (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
  342. A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
  343. A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
  344. A6XX_RBBM_INT_0_MASK_CP_SW | \
  345. A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
  346. A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
  347. A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
  348. A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
  349. A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
  350. A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
  351. A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
  352. A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
  353. A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
  354. A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
  355. #define A8XX_APRIV_MASK \
  356. (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \
  357. A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \
  358. A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \
  359. A8XX_CP_APRIV_CNTL_PIPE_RBRPWB)
  360. #define A8XX_BR_APRIV_MASK \
  361. (A8XX_APRIV_MASK | \
  362. A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \
  363. A8XX_CP_APRIV_CNTL_PIPE_CDWRITE)
  364. #define A8XX_CP_GLOBAL_INT_MASK \
  365. (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \
  366. A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \
  367. A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \
  368. A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \
  369. A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \
  370. A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \
  371. A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \
  372. A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \
  373. A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \
  374. A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \
  375. A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \
  376. A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \
  377. A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \
  378. A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV)
  379. #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \
  380. (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \
  381. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \
  382. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \
  383. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \
  384. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \
  385. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \
  386. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \
  387. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \
  388. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \
  389. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \
  390. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \
  391. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \
  392. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \
  393. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \
  394. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \
  395. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \
  396. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \
  397. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \
  398. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \
  399. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS)
  400. #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \
  401. (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \
  402. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \
  403. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \
  404. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \
  405. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \
  406. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \
  407. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \
  408. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \
  409. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \
  410. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR)
  411. static int hw_init(struct msm_gpu *gpu)
  412. {
  413. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  414. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  415. struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
  416. unsigned int pipe_id, i;
  417. u32 gmem_protect = 0;
  418. u64 gmem_range_min;
  419. int ret;
  420. ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
  421. if (ret)
  422. return ret;
  423. /* Clear the cached value to force aperture configuration next time */
  424. a6xx_gpu->cached_aperture = UINT_MAX;
  425. a8xx_aperture_clear(gpu);
  426. /* Clear GBIF halt in case GX domain was not collapsed */
  427. gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
  428. gpu_read(gpu, REG_A6XX_GBIF_HALT);
  429. gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0);
  430. gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT);
  431. gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
  432. /*
  433. * Disable the trusted memory range - we don't actually supported secure
  434. * memory rendering at this point in time and we don't want to block off
  435. * part of the virtual memory space.
  436. */
  437. gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
  438. gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
  439. /* Make all blocks contribute to the GPU BUSY perf counter */
  440. gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
  441. /* Setup GMEM Range in UCHE */
  442. gmem_range_min = SZ_64M;
  443. /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
  444. gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min);
  445. gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min);
  446. /* Setup UCHE Trap region */
  447. gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
  448. gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
  449. gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
  450. gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
  451. /* Turn on performance counters */
  452. gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1);
  453. gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1);
  454. /* Turn on the IFPC counter (countable 4 on XOCLK1) */
  455. gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1,
  456. FIELD_PREP(GENMASK(7, 0), 0x4));
  457. /* Select CP0 to always count cycles */
  458. gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1);
  459. a8xx_set_ubwc_config(gpu);
  460. /* Set weights for bicubic filtering */
  461. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
  462. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4);
  463. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee);
  464. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed);
  465. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0);
  466. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000);
  467. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8);
  468. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc);
  469. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb);
  470. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0);
  471. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b);
  472. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d);
  473. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412);
  474. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a);
  475. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05);
  476. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e);
  477. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001);
  478. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa);
  479. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7);
  480. gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7);
  481. gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
  482. a8xx_nonctxt_config(gpu, &gmem_protect);
  483. /* Enable fault detection */
  484. gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff);
  485. gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30));
  486. /* Set up the CX GMU counter 0 to count busy ticks */
  487. gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
  488. /* Enable the power counter */
  489. gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5));
  490. gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
  491. /* Protect registers from the CP */
  492. a8xx_set_cp_protect(gpu);
  493. /* Enable the GMEM save/restore feature for preemption */
  494. a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1);
  495. for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) {
  496. u32 apriv_mask = A8XX_APRIV_MASK;
  497. unsigned long flags;
  498. if (pipe_id == PIPE_LPAC)
  499. continue;
  500. if (pipe_id == PIPE_BR)
  501. apriv_mask = A8XX_BR_APRIV_MASK;
  502. a8xx_aperture_acquire(gpu, pipe_id, &flags);
  503. gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask);
  504. gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE,
  505. A8XX_CP_INTERRUPT_STATUS_MASK_PIPE);
  506. gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE,
  507. A8XX_CP_HW_FAULT_STATUS_MASK_PIPE);
  508. a8xx_aperture_release(gpu, flags);
  509. }
  510. a8xx_aperture_clear(gpu);
  511. /* Enable interrupts */
  512. gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK);
  513. gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK);
  514. ret = adreno_hw_init(gpu);
  515. if (ret)
  516. goto out;
  517. gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
  518. if (a6xx_gpu->aqe_iova)
  519. gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova);
  520. /* Set the ringbuffer address */
  521. gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
  522. gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
  523. /* Configure the RPTR shadow if needed: */
  524. gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0]));
  525. gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr));
  526. for (i = 0; i < gpu->nr_rings; i++)
  527. a6xx_gpu->shadow[i] = 0;
  528. /* Always come up on rb 0 */
  529. a6xx_gpu->cur_ring = gpu->rb[0];
  530. for (i = 0; i < gpu->nr_rings; i++)
  531. gpu->rb[i]->cur_ctx_seqno = 0;
  532. /* Enable the SQE_to start the CP engine */
  533. gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1);
  534. ret = a8xx_cp_init(gpu);
  535. if (ret)
  536. goto out;
  537. /*
  538. * Try to load a zap shader into the secure world. If successful
  539. * we can use the CP to switch out of secure mode. If not then we
  540. * have no resource but to try to switch ourselves out manually. If we
  541. * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
  542. * be blocked and a permissions violation will soon follow.
  543. */
  544. ret = a6xx_zap_shader_init(gpu);
  545. if (!ret) {
  546. OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
  547. OUT_RING(gpu->rb[0], 0x00000000);
  548. a6xx_flush(gpu, gpu->rb[0]);
  549. if (!a8xx_idle(gpu, gpu->rb[0]))
  550. return -EINVAL;
  551. } else if (ret == -ENODEV) {
  552. /*
  553. * This device does not use zap shader (but print a warning
  554. * just in case someone got their dt wrong.. hopefully they
  555. * have a debug UART to realize the error of their ways...
  556. * if you mess this up you are about to crash horribly)
  557. */
  558. dev_warn_once(gpu->dev->dev,
  559. "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
  560. gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
  561. ret = 0;
  562. } else {
  563. return ret;
  564. }
  565. /*
  566. * GMEM_PROTECT register should be programmed after GPU is transitioned to
  567. * non-secure mode
  568. */
  569. a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect);
  570. WARN_ON(!gmem_protect);
  571. a8xx_aperture_clear(gpu);
  572. /* Enable hardware clockgating */
  573. a8xx_set_hwcg(gpu, true);
  574. out:
  575. /*
  576. * Tell the GMU that we are done touching the GPU and it can start power
  577. * management
  578. */
  579. a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
  580. return ret;
  581. }
  582. int a8xx_hw_init(struct msm_gpu *gpu)
  583. {
  584. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  585. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  586. int ret;
  587. mutex_lock(&a6xx_gpu->gmu.lock);
  588. ret = hw_init(gpu);
  589. mutex_unlock(&a6xx_gpu->gmu.lock);
  590. return ret;
  591. }
  592. static void a8xx_dump(struct msm_gpu *gpu)
  593. {
  594. DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS));
  595. adreno_dump(gpu);
  596. }
  597. void a8xx_recover(struct msm_gpu *gpu)
  598. {
  599. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  600. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  601. struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
  602. int active_submits;
  603. adreno_dump_info(gpu);
  604. if (hang_debug)
  605. a8xx_dump(gpu);
  606. /*
  607. * To handle recovery specific sequences during the rpm suspend we are
  608. * about to trigger
  609. */
  610. a6xx_gpu->hung = true;
  611. /* Halt SQE first */
  612. gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3);
  613. pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
  614. /* active_submit won't change until we make a submission */
  615. mutex_lock(&gpu->active_lock);
  616. active_submits = gpu->active_submits;
  617. /*
  618. * Temporarily clear active_submits count to silence a WARN() in the
  619. * runtime suspend cb
  620. */
  621. gpu->active_submits = 0;
  622. reinit_completion(&gmu->pd_gate);
  623. dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
  624. dev_pm_genpd_synced_poweroff(gmu->cxpd);
  625. /* Drop the rpm refcount from active submits */
  626. if (active_submits)
  627. pm_runtime_put(&gpu->pdev->dev);
  628. /* And the final one from recover worker */
  629. pm_runtime_put_sync(&gpu->pdev->dev);
  630. if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
  631. DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
  632. dev_pm_genpd_remove_notifier(gmu->cxpd);
  633. pm_runtime_use_autosuspend(&gpu->pdev->dev);
  634. if (active_submits)
  635. pm_runtime_get(&gpu->pdev->dev);
  636. pm_runtime_get_sync(&gpu->pdev->dev);
  637. gpu->active_submits = active_submits;
  638. mutex_unlock(&gpu->active_lock);
  639. msm_gpu_hw_init(gpu);
  640. a6xx_gpu->hung = false;
  641. }
  642. static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
  643. {
  644. static const char * const uche_clients[] = {
  645. "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
  646. "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
  647. "STCHE",
  648. };
  649. static const char * const uche_clients_lpac[] = {
  650. "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC",
  651. };
  652. u32 val;
  653. /*
  654. * The source of the data depends on the mid ID read from FSYNR1.
  655. * and the client ID read from the UCHE block
  656. */
  657. val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF);
  658. val &= GENMASK(6, 0);
  659. /* mid=3 refers to BR or BV */
  660. if (mid == 3) {
  661. if (val < ARRAY_SIZE(uche_clients))
  662. return uche_clients[val];
  663. else
  664. return "UCHE";
  665. }
  666. /* mid=8 refers to LPAC */
  667. if (mid == 8) {
  668. if (val < ARRAY_SIZE(uche_clients_lpac))
  669. return uche_clients_lpac[val];
  670. else
  671. return "UCHE_LPAC";
  672. }
  673. return "Unknown";
  674. }
  675. static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id)
  676. {
  677. switch (id) {
  678. case 0x0:
  679. return "CP";
  680. case 0x1:
  681. return "UCHE: Unknown";
  682. case 0x2:
  683. return "UCHE_LPAC: Unknown";
  684. case 0x3:
  685. case 0x8:
  686. return a8xx_uche_fault_block(gpu, id);
  687. case 0x4:
  688. return "CCU";
  689. case 0x5:
  690. return "Flag cache";
  691. case 0x6:
  692. return "PREFETCH";
  693. case 0x7:
  694. return "GMU";
  695. case 0x9:
  696. return "UCHE_HPAC";
  697. }
  698. return "Unknown";
  699. }
  700. int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
  701. {
  702. struct msm_gpu *gpu = arg;
  703. struct adreno_smmu_fault_info *info = data;
  704. const char *block = "unknown";
  705. u32 scratch[] = {
  706. gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)),
  707. gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)),
  708. gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)),
  709. gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)),
  710. };
  711. if (info)
  712. block = a8xx_fault_block(gpu, info->fsynr1 & 0xff);
  713. return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
  714. }
  715. static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu)
  716. {
  717. u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL);
  718. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  719. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  720. u32 slice = a8xx_get_first_slice(a6xx_gpu);
  721. u32 hw_fault_mask = GENMASK(6, 0);
  722. u32 sw_fault_mask = GENMASK(22, 16);
  723. u32 pipe = 0;
  724. dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status);
  725. if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR |
  726. A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR))
  727. pipe |= BIT(PIPE_BR);
  728. if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV |
  729. A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV))
  730. pipe |= BIT(PIPE_BV);
  731. if (!pipe) {
  732. dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n");
  733. goto out;
  734. }
  735. for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) {
  736. if (!(BIT(pipe_id) & pipe))
  737. continue;
  738. if (hw_fault_mask & status) {
  739. status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
  740. REG_A8XX_CP_HW_FAULT_STATUS_PIPE);
  741. dev_err_ratelimited(&gpu->pdev->dev,
  742. "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
  743. }
  744. if (sw_fault_mask & status) {
  745. status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
  746. REG_A8XX_CP_INTERRUPT_STATUS_PIPE);
  747. dev_err_ratelimited(&gpu->pdev->dev,
  748. "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status);
  749. if (status & BIT(8)) {
  750. a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1);
  751. status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
  752. REG_A8XX_CP_SQE_STAT_DATA_PIPE);
  753. dev_err_ratelimited(&gpu->pdev->dev,
  754. "CP Opcode error, opcode=0x%x\n", status);
  755. }
  756. if (status & BIT(10)) {
  757. status = a8xx_read_pipe_slice(gpu, pipe_id, slice,
  758. REG_A8XX_CP_PROTECT_STATUS_PIPE);
  759. dev_err_ratelimited(&gpu->pdev->dev,
  760. "CP REG PROTECT error, status=0x%x\n", status);
  761. }
  762. }
  763. }
  764. out:
  765. /* Turn off interrupts to avoid triggering recovery again */
  766. a8xx_aperture_clear(gpu);
  767. gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0);
  768. gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0);
  769. kthread_queue_work(gpu->worker, &gpu->recover_work);
  770. }
  771. static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset)
  772. {
  773. gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset);
  774. return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE);
  775. }
  776. static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset)
  777. {
  778. u64 lo, hi;
  779. lo = gpu_periph_read(gpu, dbg_offset);
  780. hi = gpu_periph_read(gpu, dbg_offset + 1);
  781. return (hi << 32) | lo;
  782. }
  783. #define CP_PERIPH_IB1_BASE_LO 0x7005
  784. #define CP_PERIPH_IB1_BASE_HI 0x7006
  785. #define CP_PERIPH_IB1_SIZE 0x7007
  786. #define CP_PERIPH_IB1_OFFSET 0x7008
  787. #define CP_PERIPH_IB2_BASE_LO 0x7009
  788. #define CP_PERIPH_IB2_BASE_HI 0x700a
  789. #define CP_PERIPH_IB2_SIZE 0x700b
  790. #define CP_PERIPH_IB2_OFFSET 0x700c
  791. #define CP_PERIPH_IB3_BASE_LO 0x700d
  792. #define CP_PERIPH_IB3_BASE_HI 0x700e
  793. #define CP_PERIPH_IB3_SIZE 0x700f
  794. #define CP_PERIPH_IB3_OFFSET 0x7010
  795. static void a8xx_fault_detect_irq(struct msm_gpu *gpu)
  796. {
  797. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  798. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  799. struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
  800. unsigned long flags;
  801. /*
  802. * If stalled on SMMU fault, we could trip the GPU's hang detection,
  803. * but the fault handler will trigger the devcore dump, and we want
  804. * to otherwise resume normally rather than killing the submit, so
  805. * just bail.
  806. */
  807. if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT)
  808. return;
  809. /*
  810. * Force the GPU to stay on until after we finish
  811. * collecting information
  812. */
  813. if (!adreno_has_gmu_wrapper(adreno_gpu))
  814. gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
  815. DRM_DEV_ERROR(&gpu->pdev->dev,
  816. "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n",
  817. ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
  818. gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS));
  819. a8xx_aperture_acquire(gpu, PIPE_BR, &flags);
  820. DRM_DEV_ERROR(&gpu->pdev->dev,
  821. "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
  822. gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS),
  823. gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
  824. gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
  825. gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
  826. gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
  827. gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
  828. gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
  829. gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
  830. gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
  831. a8xx_aperture_release(gpu, flags);
  832. a8xx_aperture_acquire(gpu, PIPE_BV, &flags);
  833. DRM_DEV_ERROR(&gpu->pdev->dev,
  834. "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n",
  835. gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS),
  836. gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV),
  837. gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
  838. gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO),
  839. gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET),
  840. gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO),
  841. gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET),
  842. gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO),
  843. gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET));
  844. a8xx_aperture_release(gpu, flags);
  845. a8xx_aperture_clear(gpu);
  846. /* Turn off the hangcheck timer to keep it from bothering us */
  847. timer_delete(&gpu->hangcheck_timer);
  848. kthread_queue_work(gpu->worker, &gpu->recover_work);
  849. }
  850. static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
  851. {
  852. u32 status;
  853. status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS);
  854. gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0);
  855. dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
  856. /*
  857. * Ignore FASTBLEND violations, because the HW will silently fall back
  858. * to legacy blending.
  859. */
  860. if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
  861. A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
  862. timer_delete(&gpu->hangcheck_timer);
  863. kthread_queue_work(gpu->worker, &gpu->recover_work);
  864. }
  865. }
  866. irqreturn_t a8xx_irq(struct msm_gpu *gpu)
  867. {
  868. struct msm_drm_private *priv = gpu->dev->dev_private;
  869. u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS);
  870. gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status);
  871. if (priv->disable_err_irq)
  872. status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
  873. if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
  874. a8xx_fault_detect_irq(gpu);
  875. if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) {
  876. u32 rl0, rl1;
  877. rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0);
  878. rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1);
  879. dev_err_ratelimited(&gpu->pdev->dev,
  880. "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1);
  881. }
  882. if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
  883. a8xx_cp_hw_err_irq(gpu);
  884. if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
  885. dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
  886. if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
  887. dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
  888. if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
  889. dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
  890. if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
  891. dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n");
  892. if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
  893. a8xx_sw_fuse_violation_irq(gpu);
  894. if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
  895. msm_gpu_retire(gpu);
  896. a6xx_preempt_trigger(gpu);
  897. }
  898. if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
  899. a6xx_preempt_irq(gpu);
  900. return IRQ_HANDLED;
  901. }
  902. void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
  903. {
  904. struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
  905. struct msm_gpu *gpu = &adreno_gpu->base;
  906. if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
  907. u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
  908. gpu_scid &= GENMASK(5, 0);
  909. gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
  910. FIELD_PREP(GENMASK(29, 24), gpu_scid) |
  911. FIELD_PREP(GENMASK(23, 18), gpu_scid) |
  912. FIELD_PREP(GENMASK(17, 12), gpu_scid) |
  913. FIELD_PREP(GENMASK(11, 6), gpu_scid) |
  914. FIELD_PREP(GENMASK(5, 0), gpu_scid));
  915. gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
  916. FIELD_PREP(GENMASK(27, 22), gpu_scid) |
  917. FIELD_PREP(GENMASK(21, 16), gpu_scid) |
  918. FIELD_PREP(GENMASK(15, 10), gpu_scid) |
  919. BIT(8));
  920. }
  921. llcc_slice_activate(a6xx_gpu->htw_llc_slice);
  922. }
  923. #define GBIF_CLIENT_HALT_MASK BIT(0)
  924. #define GBIF_ARB_HALT_MASK BIT(1)
  925. #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0)
  926. #define VBIF_RESET_ACK_MASK 0xF0
  927. #define GPR0_GBIF_HALT_REQUEST 0x1E0
  928. void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
  929. {
  930. struct msm_gpu *gpu = &adreno_gpu->base;
  931. if (gx_off) {
  932. /* Halt the gx side of GBIF */
  933. gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1);
  934. spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1);
  935. }
  936. /* Halt new client requests on GBIF */
  937. gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
  938. spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
  939. (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
  940. /* Halt all AXI requests on GBIF */
  941. gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
  942. spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
  943. (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
  944. /* The GBIF halt needs to be explicitly cleared */
  945. gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
  946. }
  947. int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
  948. {
  949. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  950. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  951. mutex_lock(&a6xx_gpu->gmu.lock);
  952. /* Force the GPU power on so we can read this register */
  953. a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
  954. *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER);
  955. a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
  956. mutex_unlock(&a6xx_gpu->gmu.lock);
  957. return 0;
  958. }
  959. u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
  960. {
  961. struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  962. struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  963. u64 busy_cycles;
  964. /* 19.2MHz */
  965. *out_sample_rate = 19200000;
  966. busy_cycles = gmu_read64(&a6xx_gpu->gmu,
  967. REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
  968. REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
  969. return busy_cycles;
  970. }
  971. bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  972. {
  973. return true;
  974. }