panfrost_perfcnt.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright 2019 Collabora Ltd */
  3. #include <linux/completion.h>
  4. #include <linux/iopoll.h>
  5. #include <linux/iosys-map.h>
  6. #include <linux/pm_runtime.h>
  7. #include <linux/slab.h>
  8. #include <linux/uaccess.h>
  9. #include <drm/drm_file.h>
  10. #include <drm/drm_gem_shmem_helper.h>
  11. #include <drm/panfrost_drm.h>
  12. #include "panfrost_device.h"
  13. #include "panfrost_features.h"
  14. #include "panfrost_gem.h"
  15. #include "panfrost_issues.h"
  16. #include "panfrost_job.h"
  17. #include "panfrost_mmu.h"
  18. #include "panfrost_perfcnt.h"
  19. #include "panfrost_regs.h"
  20. #define COUNTERS_PER_BLOCK 64
  21. #define BYTES_PER_COUNTER 4
  22. #define BLOCKS_PER_COREGROUP 8
  23. #define V4_SHADERS_PER_COREGROUP 4
  24. struct panfrost_perfcnt {
  25. struct panfrost_gem_mapping *mapping;
  26. size_t bosize;
  27. void *buf;
  28. struct panfrost_file_priv *user;
  29. struct mutex lock;
  30. struct completion dump_comp;
  31. };
  32. void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
  33. {
  34. complete(&pfdev->perfcnt->dump_comp);
  35. }
  36. void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
  37. {
  38. gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
  39. }
  40. static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
  41. {
  42. u64 gpuva;
  43. int ret;
  44. reinit_completion(&pfdev->perfcnt->dump_comp);
  45. gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT;
  46. gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva));
  47. gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva));
  48. gpu_write(pfdev, GPU_INT_CLEAR,
  49. GPU_IRQ_CLEAN_CACHES_COMPLETED |
  50. GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
  51. gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
  52. ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
  53. msecs_to_jiffies(1000));
  54. if (!ret)
  55. ret = -ETIMEDOUT;
  56. else if (ret > 0)
  57. ret = 0;
  58. return ret;
  59. }
  60. static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
  61. struct drm_file *file_priv,
  62. unsigned int counterset)
  63. {
  64. struct panfrost_file_priv *user = file_priv->driver_priv;
  65. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  66. struct iosys_map map;
  67. struct drm_gem_shmem_object *bo;
  68. u32 cfg, as;
  69. int ret;
  70. if (user == perfcnt->user)
  71. return 0;
  72. else if (perfcnt->user)
  73. return -EBUSY;
  74. ret = pm_runtime_get_sync(pfdev->base.dev);
  75. if (ret < 0)
  76. goto err_put_pm;
  77. bo = drm_gem_shmem_create(&pfdev->base, perfcnt->bosize);
  78. if (IS_ERR(bo)) {
  79. ret = PTR_ERR(bo);
  80. goto err_put_pm;
  81. }
  82. /* Map the perfcnt buf in the address space attached to file_priv. */
  83. ret = panfrost_gem_open(&bo->base, file_priv);
  84. if (ret)
  85. goto err_put_bo;
  86. perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base),
  87. user);
  88. if (!perfcnt->mapping) {
  89. ret = -EINVAL;
  90. goto err_close_bo;
  91. }
  92. ret = drm_gem_vmap(&bo->base, &map);
  93. if (ret)
  94. goto err_put_mapping;
  95. perfcnt->buf = map.vaddr;
  96. panfrost_gem_internal_set_label(&bo->base, "Perfcnt sample buffer");
  97. /*
  98. * Invalidate the cache and clear the counters to start from a fresh
  99. * state.
  100. */
  101. reinit_completion(&pfdev->perfcnt->dump_comp);
  102. gpu_write(pfdev, GPU_INT_CLEAR,
  103. GPU_IRQ_CLEAN_CACHES_COMPLETED |
  104. GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
  105. gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
  106. gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
  107. ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
  108. msecs_to_jiffies(1000));
  109. if (!ret) {
  110. ret = -ETIMEDOUT;
  111. goto err_vunmap;
  112. }
  113. ret = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu);
  114. if (ret < 0)
  115. goto err_vunmap;
  116. as = ret;
  117. cfg = GPU_PERFCNT_CFG_AS(as) |
  118. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
  119. /*
  120. * Bifrost GPUs have 2 set of counters, but we're only interested by
  121. * the first one for now.
  122. */
  123. if (panfrost_model_is_bifrost(pfdev))
  124. cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
  125. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
  126. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
  127. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
  128. /*
  129. * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
  130. * counters.
  131. */
  132. if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
  133. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  134. else
  135. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
  136. gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
  137. if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
  138. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
  139. /* The BO ref is retained by the mapping. */
  140. drm_gem_object_put(&bo->base);
  141. perfcnt->user = user;
  142. return 0;
  143. err_vunmap:
  144. drm_gem_vunmap(&bo->base, &map);
  145. err_put_mapping:
  146. panfrost_gem_mapping_put(perfcnt->mapping);
  147. err_close_bo:
  148. panfrost_gem_close(&bo->base, file_priv);
  149. err_put_bo:
  150. drm_gem_object_put(&bo->base);
  151. err_put_pm:
  152. pm_runtime_put(pfdev->base.dev);
  153. return ret;
  154. }
  155. static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
  156. struct drm_file *file_priv)
  157. {
  158. struct panfrost_file_priv *user = file_priv->driver_priv;
  159. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  160. struct iosys_map map = IOSYS_MAP_INIT_VADDR(perfcnt->buf);
  161. if (user != perfcnt->user)
  162. return -EINVAL;
  163. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
  164. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
  165. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
  166. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  167. gpu_write(pfdev, GPU_PERFCNT_CFG,
  168. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
  169. perfcnt->user = NULL;
  170. drm_gem_vunmap(&perfcnt->mapping->obj->base.base, &map);
  171. perfcnt->buf = NULL;
  172. panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
  173. panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
  174. panfrost_gem_mapping_put(perfcnt->mapping);
  175. perfcnt->mapping = NULL;
  176. pm_runtime_put_autosuspend(pfdev->base.dev);
  177. return 0;
  178. }
  179. int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
  180. struct drm_file *file_priv)
  181. {
  182. struct panfrost_device *pfdev = to_panfrost_device(dev);
  183. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  184. struct drm_panfrost_perfcnt_enable *req = data;
  185. int ret;
  186. ret = panfrost_unstable_ioctl_check();
  187. if (ret)
  188. return ret;
  189. /* Only Bifrost GPUs have 2 set of counters. */
  190. if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
  191. return -EINVAL;
  192. mutex_lock(&perfcnt->lock);
  193. if (req->enable)
  194. ret = panfrost_perfcnt_enable_locked(pfdev, file_priv,
  195. req->counterset);
  196. else
  197. ret = panfrost_perfcnt_disable_locked(pfdev, file_priv);
  198. mutex_unlock(&perfcnt->lock);
  199. return ret;
  200. }
  201. int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
  202. struct drm_file *file_priv)
  203. {
  204. struct panfrost_device *pfdev = to_panfrost_device(dev);
  205. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  206. struct drm_panfrost_perfcnt_dump *req = data;
  207. void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
  208. int ret;
  209. ret = panfrost_unstable_ioctl_check();
  210. if (ret)
  211. return ret;
  212. mutex_lock(&perfcnt->lock);
  213. if (perfcnt->user != file_priv->driver_priv) {
  214. ret = -EINVAL;
  215. goto out;
  216. }
  217. ret = panfrost_perfcnt_dump_locked(pfdev);
  218. if (ret)
  219. goto out;
  220. if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
  221. ret = -EFAULT;
  222. out:
  223. mutex_unlock(&perfcnt->lock);
  224. return ret;
  225. }
  226. void panfrost_perfcnt_close(struct drm_file *file_priv)
  227. {
  228. struct panfrost_file_priv *pfile = file_priv->driver_priv;
  229. struct panfrost_device *pfdev = pfile->pfdev;
  230. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  231. pm_runtime_get_sync(pfdev->base.dev);
  232. mutex_lock(&perfcnt->lock);
  233. if (perfcnt->user == pfile)
  234. panfrost_perfcnt_disable_locked(pfdev, file_priv);
  235. mutex_unlock(&perfcnt->lock);
  236. pm_runtime_put_autosuspend(pfdev->base.dev);
  237. }
  238. int panfrost_perfcnt_init(struct panfrost_device *pfdev)
  239. {
  240. struct panfrost_perfcnt *perfcnt;
  241. size_t size;
  242. if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
  243. unsigned int ncoregroups;
  244. ncoregroups = hweight64(pfdev->features.l2_present);
  245. size = ncoregroups * BLOCKS_PER_COREGROUP *
  246. COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
  247. } else {
  248. unsigned int nl2c, ncores;
  249. /*
  250. * TODO: define a macro to extract the number of l2 caches from
  251. * mem_features.
  252. */
  253. nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
  254. /*
  255. * shader_present might be sparse, but the counters layout
  256. * forces to dump unused regions too, hence the fls64() call
  257. * instead of hweight64().
  258. */
  259. ncores = fls64(pfdev->features.shader_present);
  260. /*
  261. * There's always one JM and one Tiler block, hence the '+ 2'
  262. * here.
  263. */
  264. size = (nl2c + ncores + 2) *
  265. COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
  266. }
  267. perfcnt = devm_kzalloc(pfdev->base.dev, sizeof(*perfcnt), GFP_KERNEL);
  268. if (!perfcnt)
  269. return -ENOMEM;
  270. perfcnt->bosize = size;
  271. /* Start with everything disabled. */
  272. gpu_write(pfdev, GPU_PERFCNT_CFG,
  273. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
  274. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
  275. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
  276. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
  277. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  278. init_completion(&perfcnt->dump_comp);
  279. mutex_init(&perfcnt->lock);
  280. pfdev->perfcnt = perfcnt;
  281. return 0;
  282. }
  283. void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
  284. {
  285. /* Disable everything before leaving. */
  286. gpu_write(pfdev, GPU_PERFCNT_CFG,
  287. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
  288. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
  289. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
  290. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
  291. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  292. }