aie2_ctx.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2024, Advanced Micro Devices, Inc.
  4. */
  5. #include <drm/amdxdna_accel.h>
  6. #include <drm/drm_device.h>
  7. #include <drm/drm_gem.h>
  8. #include <drm/drm_gem_shmem_helper.h>
  9. #include <drm/drm_print.h>
  10. #include <drm/drm_syncobj.h>
  11. #include <linux/hmm.h>
  12. #include <linux/types.h>
  13. #include <linux/xarray.h>
  14. #include <trace/events/amdxdna.h>
  15. #include "aie2_msg_priv.h"
  16. #include "aie2_pci.h"
  17. #include "aie2_solver.h"
  18. #include "amdxdna_ctx.h"
  19. #include "amdxdna_gem.h"
  20. #include "amdxdna_mailbox.h"
  21. #include "amdxdna_pci_drv.h"
  22. #include "amdxdna_pm.h"
  23. static bool force_cmdlist = true;
  24. module_param(force_cmdlist, bool, 0600);
  25. MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)");
  26. #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
  27. static void aie2_job_release(struct kref *ref)
  28. {
  29. struct amdxdna_sched_job *job;
  30. job = container_of(ref, struct amdxdna_sched_job, refcnt);
  31. amdxdna_sched_job_cleanup(job);
  32. atomic64_inc(&job->hwctx->job_free_cnt);
  33. wake_up(&job->hwctx->priv->job_free_wq);
  34. if (job->out_fence)
  35. dma_fence_put(job->out_fence);
  36. kfree(job);
  37. }
  38. static void aie2_job_put(struct amdxdna_sched_job *job)
  39. {
  40. kref_put(&job->refcnt, aie2_job_release);
  41. }
  42. /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
  43. static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
  44. struct drm_sched_job *bad_job)
  45. {
  46. drm_sched_stop(&hwctx->priv->sched, bad_job);
  47. aie2_destroy_context(xdna->dev_handle, hwctx);
  48. drm_sched_start(&hwctx->priv->sched, 0);
  49. }
  50. static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
  51. {
  52. struct amdxdna_gem_obj *heap = hwctx->priv->heap;
  53. int ret;
  54. ret = aie2_create_context(xdna->dev_handle, hwctx);
  55. if (ret) {
  56. XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
  57. goto out;
  58. }
  59. ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
  60. heap->mem.userptr, heap->mem.size);
  61. if (ret) {
  62. XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
  63. goto out;
  64. }
  65. ret = aie2_config_cu(hwctx, NULL);
  66. if (ret) {
  67. XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
  68. goto out;
  69. }
  70. out:
  71. XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
  72. return ret;
  73. }
  74. static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
  75. {
  76. struct dma_fence *fence, *out_fence = NULL;
  77. int ret;
  78. fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
  79. if (!fence)
  80. return NULL;
  81. ret = dma_fence_chain_find_seqno(&fence, seq);
  82. if (ret)
  83. goto out;
  84. out_fence = dma_fence_get(dma_fence_chain_contained(fence));
  85. out:
  86. dma_fence_put(fence);
  87. return out_fence;
  88. }
  89. static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
  90. {
  91. struct dma_fence *fence;
  92. fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
  93. if (!fence)
  94. return;
  95. /* Wait up to 2 seconds for fw to finish all pending requests */
  96. dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
  97. dma_fence_put(fence);
  98. }
  99. static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
  100. {
  101. struct amdxdna_dev *xdna = hwctx->client->xdna;
  102. aie2_hwctx_wait_for_idle(hwctx);
  103. aie2_hwctx_stop(xdna, hwctx, NULL);
  104. return 0;
  105. }
  106. void aie2_hwctx_suspend(struct amdxdna_client *client)
  107. {
  108. struct amdxdna_dev *xdna = client->xdna;
  109. /*
  110. * Command timeout is unlikely. But if it happens, it doesn't
  111. * break the system. aie2_hwctx_stop() will destroy mailbox
  112. * and abort all commands.
  113. */
  114. drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
  115. amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb);
  116. }
  117. static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
  118. {
  119. struct amdxdna_dev *xdna = hwctx->client->xdna;
  120. return aie2_hwctx_restart(xdna, hwctx);
  121. }
  122. int aie2_hwctx_resume(struct amdxdna_client *client)
  123. {
  124. /*
  125. * The resume path cannot guarantee that mailbox channel can be
  126. * regenerated. If this happen, when submit message to this
  127. * mailbox channel, error will return.
  128. */
  129. return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
  130. }
  131. static void
  132. aie2_sched_notify(struct amdxdna_sched_job *job)
  133. {
  134. struct dma_fence *fence = job->fence;
  135. trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
  136. job->hwctx->priv->completed++;
  137. dma_fence_signal(fence);
  138. up(&job->hwctx->priv->job_sem);
  139. job->job_done = true;
  140. mmput_async(job->mm);
  141. aie2_job_put(job);
  142. }
  143. static int
  144. aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
  145. {
  146. struct amdxdna_sched_job *job = handle;
  147. struct amdxdna_gem_obj *cmd_abo;
  148. int ret = 0;
  149. u32 status;
  150. cmd_abo = job->cmd_bo;
  151. if (unlikely(job->job_timeout)) {
  152. amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
  153. ret = -EINVAL;
  154. goto out;
  155. }
  156. if (unlikely(!data) || unlikely(size != sizeof(u32))) {
  157. amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
  158. ret = -EINVAL;
  159. goto out;
  160. }
  161. status = readl(data);
  162. XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
  163. if (status == AIE2_STATUS_SUCCESS)
  164. amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
  165. else
  166. amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR);
  167. out:
  168. aie2_sched_notify(job);
  169. return ret;
  170. }
  171. static int
  172. aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
  173. {
  174. struct amdxdna_sched_job *job = handle;
  175. int ret = 0;
  176. if (unlikely(!data))
  177. goto out;
  178. if (unlikely(size != sizeof(u32))) {
  179. ret = -EINVAL;
  180. goto out;
  181. }
  182. job->drv_cmd->result = readl(data);
  183. out:
  184. aie2_sched_notify(job);
  185. return ret;
  186. }
  187. static int
  188. aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
  189. {
  190. struct amdxdna_sched_job *job = handle;
  191. struct amdxdna_gem_obj *cmd_abo;
  192. struct amdxdna_dev *xdna;
  193. u32 fail_cmd_status;
  194. u32 fail_cmd_idx;
  195. u32 cmd_status;
  196. int ret = 0;
  197. cmd_abo = job->cmd_bo;
  198. if (unlikely(job->job_timeout)) {
  199. amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
  200. ret = -EINVAL;
  201. goto out;
  202. }
  203. if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
  204. amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
  205. ret = -EINVAL;
  206. goto out;
  207. }
  208. cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
  209. xdna = job->hwctx->client->xdna;
  210. XDNA_DBG(xdna, "Status 0x%x", cmd_status);
  211. if (cmd_status == AIE2_STATUS_SUCCESS) {
  212. amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
  213. goto out;
  214. }
  215. /* Slow path to handle error, read from ringbuf on BAR */
  216. fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
  217. fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
  218. XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
  219. fail_cmd_idx, fail_cmd_status);
  220. if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
  221. amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT);
  222. ret = -EINVAL;
  223. } else {
  224. amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR);
  225. }
  226. out:
  227. aie2_sched_notify(job);
  228. return ret;
  229. }
  230. static struct dma_fence *
  231. aie2_sched_job_run(struct drm_sched_job *sched_job)
  232. {
  233. struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
  234. struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
  235. struct amdxdna_hwctx *hwctx = job->hwctx;
  236. struct dma_fence *fence;
  237. int ret;
  238. if (!hwctx->priv->mbox_chann)
  239. return NULL;
  240. if (!mmget_not_zero(job->mm))
  241. return ERR_PTR(-ESRCH);
  242. kref_get(&job->refcnt);
  243. fence = dma_fence_get(job->fence);
  244. if (job->drv_cmd) {
  245. switch (job->drv_cmd->opcode) {
  246. case SYNC_DEBUG_BO:
  247. ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
  248. break;
  249. case ATTACH_DEBUG_BO:
  250. ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
  251. break;
  252. default:
  253. ret = -EINVAL;
  254. break;
  255. }
  256. goto out;
  257. }
  258. amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
  259. if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
  260. ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
  261. else if (force_cmdlist)
  262. ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
  263. else
  264. ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
  265. out:
  266. if (ret) {
  267. dma_fence_put(job->fence);
  268. aie2_job_put(job);
  269. mmput(job->mm);
  270. fence = ERR_PTR(ret);
  271. }
  272. trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
  273. return fence;
  274. }
  275. static void aie2_sched_job_free(struct drm_sched_job *sched_job)
  276. {
  277. struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
  278. struct amdxdna_hwctx *hwctx = job->hwctx;
  279. trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
  280. if (!job->job_done)
  281. up(&hwctx->priv->job_sem);
  282. drm_sched_job_cleanup(sched_job);
  283. aie2_job_put(job);
  284. }
  285. static enum drm_gpu_sched_stat
  286. aie2_sched_job_timedout(struct drm_sched_job *sched_job)
  287. {
  288. struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
  289. struct amdxdna_hwctx *hwctx = job->hwctx;
  290. struct amdxdna_dev *xdna;
  291. xdna = hwctx->client->xdna;
  292. trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
  293. job->job_timeout = true;
  294. mutex_lock(&xdna->dev_lock);
  295. aie2_hwctx_stop(xdna, hwctx, sched_job);
  296. aie2_hwctx_restart(xdna, hwctx);
  297. mutex_unlock(&xdna->dev_lock);
  298. return DRM_GPU_SCHED_STAT_RESET;
  299. }
  300. static const struct drm_sched_backend_ops sched_ops = {
  301. .run_job = aie2_sched_job_run,
  302. .free_job = aie2_sched_job_free,
  303. .timedout_job = aie2_sched_job_timedout,
  304. };
  305. static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
  306. {
  307. struct amdxdna_dev *xdna = hwctx->client->xdna;
  308. struct amdxdna_dev_hdl *ndev;
  309. int start, end, first, last;
  310. u32 width = 1, entries = 0;
  311. int i;
  312. if (!hwctx->num_tiles) {
  313. XDNA_ERR(xdna, "Number of tiles is zero");
  314. return -EINVAL;
  315. }
  316. ndev = xdna->dev_handle;
  317. if (unlikely(!ndev->metadata.core.row_count)) {
  318. XDNA_WARN(xdna, "Core tile row count is zero");
  319. return -EINVAL;
  320. }
  321. hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
  322. if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
  323. XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
  324. return -EINVAL;
  325. }
  326. if (ndev->priv->col_align == COL_ALIGN_NATURE)
  327. width = hwctx->num_col;
  328. /*
  329. * In range [start, end], find out columns that is multiple of width.
  330. * 'first' is the first column,
  331. * 'last' is the last column,
  332. * 'entries' is the total number of columns.
  333. */
  334. start = xdna->dev_info->first_col;
  335. end = ndev->total_col - hwctx->num_col;
  336. if (start > 0 && end == 0) {
  337. XDNA_DBG(xdna, "Force start from col 0");
  338. start = 0;
  339. }
  340. first = start + (width - start % width) % width;
  341. last = end - end % width;
  342. if (last >= first)
  343. entries = (last - first) / width + 1;
  344. XDNA_DBG(xdna, "start %d end %d first %d last %d",
  345. start, end, first, last);
  346. if (unlikely(!entries)) {
  347. XDNA_ERR(xdna, "Start %d end %d width %d",
  348. start, end, width);
  349. return -EINVAL;
  350. }
  351. hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
  352. if (!hwctx->col_list)
  353. return -ENOMEM;
  354. hwctx->col_list_len = entries;
  355. hwctx->col_list[0] = first;
  356. for (i = 1; i < entries; i++)
  357. hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
  358. print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
  359. entries * sizeof(*hwctx->col_list), false);
  360. return 0;
  361. }
  362. static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
  363. {
  364. struct amdxdna_dev *xdna = hwctx->client->xdna;
  365. struct alloc_requests *xrs_req;
  366. int ret;
  367. if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
  368. hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
  369. hwctx->num_col = xdna->dev_handle->total_col;
  370. return aie2_create_context(xdna->dev_handle, hwctx);
  371. }
  372. xrs_req = kzalloc_obj(*xrs_req);
  373. if (!xrs_req)
  374. return -ENOMEM;
  375. xrs_req->cdo.start_cols = hwctx->col_list;
  376. xrs_req->cdo.cols_len = hwctx->col_list_len;
  377. xrs_req->cdo.ncols = hwctx->num_col;
  378. xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
  379. xrs_req->rqos.gops = hwctx->qos.gops;
  380. xrs_req->rqos.fps = hwctx->qos.fps;
  381. xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
  382. xrs_req->rqos.latency = hwctx->qos.latency;
  383. xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
  384. xrs_req->rqos.priority = hwctx->qos.priority;
  385. xrs_req->rid = (uintptr_t)hwctx;
  386. ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
  387. if (ret)
  388. XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
  389. kfree(xrs_req);
  390. return ret;
  391. }
  392. static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
  393. {
  394. struct amdxdna_dev *xdna = hwctx->client->xdna;
  395. int ret;
  396. if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
  397. ret = aie2_destroy_context(xdna->dev_handle, hwctx);
  398. if (ret && ret != -ENODEV)
  399. XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret);
  400. } else {
  401. ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
  402. if (ret)
  403. XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
  404. }
  405. }
  406. static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
  407. {
  408. struct amdxdna_dev *xdna = hwctx->client->xdna;
  409. struct drm_file *filp = hwctx->client->filp;
  410. struct drm_syncobj *syncobj;
  411. u32 hdl;
  412. int ret;
  413. hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
  414. ret = drm_syncobj_create(&syncobj, 0, NULL);
  415. if (ret) {
  416. XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
  417. return ret;
  418. }
  419. ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
  420. if (ret) {
  421. drm_syncobj_put(syncobj);
  422. XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
  423. return ret;
  424. }
  425. hwctx->priv->syncobj = syncobj;
  426. hwctx->syncobj_hdl = hdl;
  427. return 0;
  428. }
  429. static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
  430. {
  431. /*
  432. * The syncobj_hdl is owned by user space and will be cleaned up
  433. * separately.
  434. */
  435. drm_syncobj_put(hwctx->priv->syncobj);
  436. }
  437. int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
  438. {
  439. struct amdxdna_client *client = hwctx->client;
  440. struct amdxdna_dev *xdna = client->xdna;
  441. const struct drm_sched_init_args args = {
  442. .ops = &sched_ops,
  443. .num_rqs = DRM_SCHED_PRIORITY_COUNT,
  444. .credit_limit = HWCTX_MAX_CMDS,
  445. .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
  446. .name = "amdxdna_js",
  447. .dev = xdna->ddev.dev,
  448. };
  449. struct drm_gpu_scheduler *sched;
  450. struct amdxdna_hwctx_priv *priv;
  451. struct amdxdna_gem_obj *heap;
  452. int i, ret;
  453. priv = kzalloc_obj(*hwctx->priv);
  454. if (!priv)
  455. return -ENOMEM;
  456. hwctx->priv = priv;
  457. mutex_lock(&client->mm_lock);
  458. heap = client->dev_heap;
  459. if (!heap) {
  460. XDNA_ERR(xdna, "The client dev heap object not exist");
  461. mutex_unlock(&client->mm_lock);
  462. ret = -ENOENT;
  463. goto free_priv;
  464. }
  465. drm_gem_object_get(to_gobj(heap));
  466. mutex_unlock(&client->mm_lock);
  467. priv->heap = heap;
  468. sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
  469. ret = amdxdna_gem_pin(heap);
  470. if (ret) {
  471. XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
  472. goto put_heap;
  473. }
  474. for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
  475. struct amdxdna_gem_obj *abo;
  476. struct amdxdna_drm_create_bo args = {
  477. .flags = 0,
  478. .type = AMDXDNA_BO_DEV,
  479. .vaddr = 0,
  480. .size = MAX_CHAIN_CMDBUF_SIZE,
  481. };
  482. abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
  483. if (IS_ERR(abo)) {
  484. ret = PTR_ERR(abo);
  485. goto free_cmd_bufs;
  486. }
  487. XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
  488. i, abo->mem.dev_addr, abo->mem.size);
  489. priv->cmd_buf[i] = abo;
  490. }
  491. sched = &priv->sched;
  492. mutex_init(&priv->io_lock);
  493. fs_reclaim_acquire(GFP_KERNEL);
  494. might_lock(&priv->io_lock);
  495. fs_reclaim_release(GFP_KERNEL);
  496. ret = drm_sched_init(sched, &args);
  497. if (ret) {
  498. XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
  499. goto free_cmd_bufs;
  500. }
  501. ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
  502. &sched, 1, NULL);
  503. if (ret) {
  504. XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
  505. goto free_sched;
  506. }
  507. ret = aie2_hwctx_col_list(hwctx);
  508. if (ret) {
  509. XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
  510. goto free_entity;
  511. }
  512. ret = amdxdna_pm_resume_get_locked(xdna);
  513. if (ret)
  514. goto free_col_list;
  515. ret = aie2_alloc_resource(hwctx);
  516. if (ret) {
  517. XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
  518. goto suspend_put;
  519. }
  520. ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
  521. heap->mem.userptr, heap->mem.size);
  522. if (ret) {
  523. XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
  524. goto release_resource;
  525. }
  526. ret = aie2_ctx_syncobj_create(hwctx);
  527. if (ret) {
  528. XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
  529. goto release_resource;
  530. }
  531. amdxdna_pm_suspend_put(xdna);
  532. init_waitqueue_head(&priv->job_free_wq);
  533. XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
  534. return 0;
  535. release_resource:
  536. aie2_release_resource(hwctx);
  537. suspend_put:
  538. amdxdna_pm_suspend_put(xdna);
  539. free_col_list:
  540. kfree(hwctx->col_list);
  541. free_entity:
  542. drm_sched_entity_destroy(&priv->entity);
  543. free_sched:
  544. drm_sched_fini(&priv->sched);
  545. free_cmd_bufs:
  546. for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
  547. if (!priv->cmd_buf[i])
  548. continue;
  549. drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
  550. }
  551. amdxdna_gem_unpin(heap);
  552. put_heap:
  553. drm_gem_object_put(to_gobj(heap));
  554. free_priv:
  555. kfree(priv);
  556. return ret;
  557. }
  558. void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
  559. {
  560. struct amdxdna_dev *xdna;
  561. int idx;
  562. xdna = hwctx->client->xdna;
  563. XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
  564. aie2_hwctx_wait_for_idle(hwctx);
  565. /* Request fw to destroy hwctx and cancel the rest pending requests */
  566. drm_sched_stop(&hwctx->priv->sched, NULL);
  567. aie2_release_resource(hwctx);
  568. drm_sched_start(&hwctx->priv->sched, 0);
  569. mutex_unlock(&xdna->dev_lock);
  570. drm_sched_entity_destroy(&hwctx->priv->entity);
  571. /* Wait for all submitted jobs to be completed or canceled */
  572. wait_event(hwctx->priv->job_free_wq,
  573. atomic64_read(&hwctx->job_submit_cnt) ==
  574. atomic64_read(&hwctx->job_free_cnt));
  575. mutex_lock(&xdna->dev_lock);
  576. drm_sched_fini(&hwctx->priv->sched);
  577. aie2_ctx_syncobj_destroy(hwctx);
  578. for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
  579. drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
  580. amdxdna_gem_unpin(hwctx->priv->heap);
  581. drm_gem_object_put(to_gobj(hwctx->priv->heap));
  582. mutex_destroy(&hwctx->priv->io_lock);
  583. kfree(hwctx->col_list);
  584. kfree(hwctx->priv);
  585. kfree(hwctx->cus);
  586. }
  587. static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
  588. {
  589. struct amdxdna_hwctx *hwctx = handle;
  590. amdxdna_pm_suspend_put(hwctx->client->xdna);
  591. return 0;
  592. }
  593. static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
  594. {
  595. struct amdxdna_hwctx_param_config_cu *config = buf;
  596. struct amdxdna_dev *xdna = hwctx->client->xdna;
  597. u32 total_size;
  598. int ret;
  599. XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
  600. if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
  601. return -EINVAL;
  602. if (hwctx->cus) {
  603. XDNA_ERR(xdna, "Not support re-config CU");
  604. return -EINVAL;
  605. }
  606. if (!config->num_cus) {
  607. XDNA_ERR(xdna, "Number of CU is zero");
  608. return -EINVAL;
  609. }
  610. total_size = struct_size(config, cu_configs, config->num_cus);
  611. if (total_size > size) {
  612. XDNA_ERR(xdna, "CU config larger than size");
  613. return -EINVAL;
  614. }
  615. hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
  616. if (!hwctx->cus)
  617. return -ENOMEM;
  618. ret = amdxdna_pm_resume_get_locked(xdna);
  619. if (ret)
  620. goto free_cus;
  621. ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
  622. if (ret) {
  623. XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
  624. goto pm_suspend_put;
  625. }
  626. wmb(); /* To avoid locking in command submit when check status */
  627. return 0;
  628. pm_suspend_put:
  629. amdxdna_pm_suspend_put(xdna);
  630. free_cus:
  631. kfree(hwctx->cus);
  632. hwctx->cus = NULL;
  633. return ret;
  634. }
  635. static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
  636. {
  637. struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
  638. if (!out_fence) {
  639. XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
  640. return;
  641. }
  642. dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
  643. dma_fence_put(out_fence);
  644. }
  645. static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
  646. bool attach)
  647. {
  648. struct amdxdna_client *client = hwctx->client;
  649. struct amdxdna_dev *xdna = client->xdna;
  650. struct amdxdna_drv_cmd cmd = { 0 };
  651. struct amdxdna_gem_obj *abo;
  652. u64 seq;
  653. int ret;
  654. abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
  655. if (!abo) {
  656. XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
  657. return -EINVAL;
  658. }
  659. if (attach) {
  660. if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
  661. ret = -EBUSY;
  662. goto put_obj;
  663. }
  664. cmd.opcode = ATTACH_DEBUG_BO;
  665. } else {
  666. if (abo->assigned_hwctx != hwctx->id) {
  667. ret = -EINVAL;
  668. goto put_obj;
  669. }
  670. cmd.opcode = DETACH_DEBUG_BO;
  671. }
  672. ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
  673. &bo_hdl, 1, hwctx->id, &seq);
  674. if (ret) {
  675. XDNA_ERR(xdna, "Submit command failed");
  676. goto put_obj;
  677. }
  678. aie2_cmd_wait(hwctx, seq);
  679. if (cmd.result) {
  680. XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
  681. goto put_obj;
  682. }
  683. if (attach)
  684. abo->assigned_hwctx = hwctx->id;
  685. else
  686. abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
  687. XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
  688. put_obj:
  689. amdxdna_gem_put_obj(abo);
  690. return ret;
  691. }
  692. int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
  693. {
  694. struct amdxdna_dev *xdna = hwctx->client->xdna;
  695. drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
  696. switch (type) {
  697. case DRM_AMDXDNA_HWCTX_CONFIG_CU:
  698. return aie2_hwctx_cu_config(hwctx, buf, size);
  699. case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
  700. return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
  701. case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
  702. return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
  703. default:
  704. XDNA_DBG(xdna, "Not supported type %d", type);
  705. return -EOPNOTSUPP;
  706. }
  707. }
  708. int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
  709. {
  710. struct amdxdna_client *client = hwctx->client;
  711. struct amdxdna_dev *xdna = client->xdna;
  712. struct amdxdna_drv_cmd cmd = { 0 };
  713. u64 seq;
  714. int ret;
  715. cmd.opcode = SYNC_DEBUG_BO;
  716. ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
  717. &debug_bo_hdl, 1, hwctx->id, &seq);
  718. if (ret) {
  719. XDNA_ERR(xdna, "Submit command failed");
  720. return ret;
  721. }
  722. aie2_cmd_wait(hwctx, seq);
  723. if (cmd.result) {
  724. XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
  725. return -EINVAL;
  726. }
  727. return 0;
  728. }
  729. static int aie2_populate_range(struct amdxdna_gem_obj *abo)
  730. {
  731. struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
  732. struct amdxdna_umap *mapp;
  733. unsigned long timeout;
  734. struct mm_struct *mm;
  735. bool found;
  736. int ret;
  737. timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
  738. again:
  739. found = false;
  740. down_write(&xdna->notifier_lock);
  741. list_for_each_entry(mapp, &abo->mem.umap_list, node) {
  742. if (mapp->invalid) {
  743. found = true;
  744. break;
  745. }
  746. }
  747. if (!found) {
  748. abo->mem.map_invalid = false;
  749. up_write(&xdna->notifier_lock);
  750. return 0;
  751. }
  752. kref_get(&mapp->refcnt);
  753. up_write(&xdna->notifier_lock);
  754. XDNA_DBG(xdna, "populate memory range %lx %lx",
  755. mapp->vma->vm_start, mapp->vma->vm_end);
  756. mm = mapp->notifier.mm;
  757. if (!mmget_not_zero(mm)) {
  758. amdxdna_umap_put(mapp);
  759. return -EFAULT;
  760. }
  761. mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
  762. mmap_read_lock(mm);
  763. ret = hmm_range_fault(&mapp->range);
  764. mmap_read_unlock(mm);
  765. if (ret) {
  766. if (time_after(jiffies, timeout)) {
  767. ret = -ETIME;
  768. goto put_mm;
  769. }
  770. if (ret == -EBUSY) {
  771. amdxdna_umap_put(mapp);
  772. goto again;
  773. }
  774. goto put_mm;
  775. }
  776. down_write(&xdna->notifier_lock);
  777. if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
  778. up_write(&xdna->notifier_lock);
  779. amdxdna_umap_put(mapp);
  780. goto again;
  781. }
  782. mapp->invalid = false;
  783. up_write(&xdna->notifier_lock);
  784. amdxdna_umap_put(mapp);
  785. goto again;
  786. put_mm:
  787. amdxdna_umap_put(mapp);
  788. mmput(mm);
  789. return ret;
  790. }
  791. int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
  792. {
  793. struct amdxdna_dev *xdna = hwctx->client->xdna;
  794. struct ww_acquire_ctx acquire_ctx;
  795. struct dma_fence_chain *chain;
  796. struct amdxdna_gem_obj *abo;
  797. unsigned long timeout = 0;
  798. int ret, i;
  799. ret = down_interruptible(&hwctx->priv->job_sem);
  800. if (ret) {
  801. XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
  802. return ret;
  803. }
  804. chain = dma_fence_chain_alloc();
  805. if (!chain) {
  806. XDNA_ERR(xdna, "Alloc fence chain failed");
  807. ret = -ENOMEM;
  808. goto up_sem;
  809. }
  810. ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
  811. hwctx->client->filp->client_id);
  812. if (ret) {
  813. XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
  814. goto free_chain;
  815. }
  816. retry:
  817. ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
  818. if (ret) {
  819. XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
  820. goto cleanup_job;
  821. }
  822. for (i = 0; i < job->bo_cnt; i++) {
  823. ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
  824. if (ret) {
  825. XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
  826. drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
  827. goto cleanup_job;
  828. }
  829. }
  830. down_read(&xdna->notifier_lock);
  831. for (i = 0; i < job->bo_cnt; i++) {
  832. abo = to_xdna_obj(job->bos[i]);
  833. if (abo->mem.map_invalid) {
  834. up_read(&xdna->notifier_lock);
  835. drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
  836. if (!timeout) {
  837. timeout = jiffies +
  838. msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
  839. } else if (time_after(jiffies, timeout)) {
  840. ret = -ETIME;
  841. goto cleanup_job;
  842. }
  843. ret = aie2_populate_range(abo);
  844. if (ret)
  845. goto cleanup_job;
  846. goto retry;
  847. }
  848. }
  849. mutex_lock(&hwctx->priv->io_lock);
  850. drm_sched_job_arm(&job->base);
  851. job->out_fence = dma_fence_get(&job->base.s_fence->finished);
  852. for (i = 0; i < job->bo_cnt; i++)
  853. dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
  854. job->seq = hwctx->priv->seq++;
  855. kref_get(&job->refcnt);
  856. drm_sched_entity_push_job(&job->base);
  857. *seq = job->seq;
  858. drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
  859. mutex_unlock(&hwctx->priv->io_lock);
  860. up_read(&xdna->notifier_lock);
  861. drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
  862. aie2_job_put(job);
  863. atomic64_inc(&hwctx->job_submit_cnt);
  864. return 0;
  865. cleanup_job:
  866. drm_sched_job_cleanup(&job->base);
  867. free_chain:
  868. dma_fence_chain_free(chain);
  869. up_sem:
  870. up(&hwctx->priv->job_sem);
  871. job->job_done = true;
  872. return ret;
  873. }
  874. void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
  875. unsigned long cur_seq)
  876. {
  877. struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
  878. struct drm_gem_object *gobj = to_gobj(abo);
  879. long ret;
  880. ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
  881. true, MAX_SCHEDULE_TIMEOUT);
  882. if (!ret)
  883. XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
  884. else if (ret == -ERESTARTSYS)
  885. XDNA_DBG(xdna, "Wait for bo interrupted by signal");
  886. }