qaic_data.c 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
  3. /* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
  4. #include <linux/bitfield.h>
  5. #include <linux/bits.h>
  6. #include <linux/completion.h>
  7. #include <linux/delay.h>
  8. #include <linux/dma-buf.h>
  9. #include <linux/dma-mapping.h>
  10. #include <linux/interrupt.h>
  11. #include <linux/kref.h>
  12. #include <linux/list.h>
  13. #include <linux/math64.h>
  14. #include <linux/mm.h>
  15. #include <linux/moduleparam.h>
  16. #include <linux/scatterlist.h>
  17. #include <linux/spinlock.h>
  18. #include <linux/srcu.h>
  19. #include <linux/string.h>
  20. #include <linux/types.h>
  21. #include <linux/uaccess.h>
  22. #include <linux/wait.h>
  23. #include <drm/drm_file.h>
  24. #include <drm/drm_gem.h>
  25. #include <drm/drm_prime.h>
  26. #include <drm/drm_print.h>
  27. #include <uapi/drm/qaic_accel.h>
  28. #include "qaic.h"
  29. #define SEM_VAL_MASK GENMASK_ULL(11, 0)
  30. #define SEM_INDEX_MASK GENMASK_ULL(4, 0)
  31. #define BULK_XFER BIT(3)
  32. #define GEN_COMPLETION BIT(4)
  33. #define INBOUND_XFER 1
  34. #define OUTBOUND_XFER 2
  35. #define REQHP_OFF 0x0 /* we read this */
  36. #define REQTP_OFF 0x4 /* we write this */
  37. #define RSPHP_OFF 0x8 /* we write this */
  38. #define RSPTP_OFF 0xc /* we read this */
  39. #define ENCODE_SEM(val, index, sync, cmd, flags) \
  40. ({ \
  41. FIELD_PREP(GENMASK(11, 0), (val)) | \
  42. FIELD_PREP(GENMASK(20, 16), (index)) | \
  43. FIELD_PREP(BIT(22), (sync)) | \
  44. FIELD_PREP(GENMASK(26, 24), (cmd)) | \
  45. FIELD_PREP(GENMASK(30, 29), (flags)) | \
  46. FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \
  47. })
  48. #define NUM_EVENTS 128
  49. #define NUM_DELAYS 10
  50. #define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size())
  51. static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */
  52. module_param(wait_exec_default_timeout_ms, uint, 0600);
  53. MODULE_PARM_DESC(wait_exec_default_timeout_ms, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO");
  54. static unsigned int datapath_poll_interval_us = 100; /* 100 usec default */
  55. module_param(datapath_poll_interval_us, uint, 0600);
  56. MODULE_PARM_DESC(datapath_poll_interval_us,
  57. "Amount of time to sleep between activity when datapath polling is enabled");
  58. struct dbc_req {
  59. /*
  60. * A request ID is assigned to each memory handle going in DMA queue.
  61. * As a single memory handle can enqueue multiple elements in DMA queue
  62. * all of them will have the same request ID.
  63. */
  64. __le16 req_id;
  65. /* Future use */
  66. __u8 seq_id;
  67. /*
  68. * Special encoded variable
  69. * 7 0 - Do not force to generate MSI after DMA is completed
  70. * 1 - Force to generate MSI after DMA is completed
  71. * 6:5 Reserved
  72. * 4 1 - Generate completion element in the response queue
  73. * 0 - No Completion Code
  74. * 3 0 - DMA request is a Link list transfer
  75. * 1 - DMA request is a Bulk transfer
  76. * 2 Reserved
  77. * 1:0 00 - No DMA transfer involved
  78. * 01 - DMA transfer is part of inbound transfer
  79. * 10 - DMA transfer has outbound transfer
  80. * 11 - NA
  81. */
  82. __u8 cmd;
  83. __le32 resv;
  84. /* Source address for the transfer */
  85. __le64 src_addr;
  86. /* Destination address for the transfer */
  87. __le64 dest_addr;
  88. /* Length of transfer request */
  89. __le32 len;
  90. __le32 resv2;
  91. /* Doorbell address */
  92. __le64 db_addr;
  93. /*
  94. * Special encoded variable
  95. * 7 1 - Doorbell(db) write
  96. * 0 - No doorbell write
  97. * 6:2 Reserved
  98. * 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary
  99. * 01 - 16 bit access, db address must be aligned to 16bit-boundary
  100. * 10 - 8 bit access, db address must be aligned to 8bit-boundary
  101. * 11 - Reserved
  102. */
  103. __u8 db_len;
  104. __u8 resv3;
  105. __le16 resv4;
  106. /* 32 bit data written to doorbell address */
  107. __le32 db_data;
  108. /*
  109. * Special encoded variable
  110. * All the fields of sem_cmdX are passed from user and all are ORed
  111. * together to form sem_cmd.
  112. * 0:11 Semaphore value
  113. * 15:12 Reserved
  114. * 20:16 Semaphore index
  115. * 21 Reserved
  116. * 22 Semaphore Sync
  117. * 23 Reserved
  118. * 26:24 Semaphore command
  119. * 28:27 Reserved
  120. * 29 Semaphore DMA out bound sync fence
  121. * 30 Semaphore DMA in bound sync fence
  122. * 31 Enable semaphore command
  123. */
  124. __le32 sem_cmd0;
  125. __le32 sem_cmd1;
  126. __le32 sem_cmd2;
  127. __le32 sem_cmd3;
  128. } __packed;
  129. struct dbc_rsp {
  130. /* Request ID of the memory handle whose DMA transaction is completed */
  131. __le16 req_id;
  132. /* Status of the DMA transaction. 0 : Success otherwise failure */
  133. __le16 status;
  134. } __packed;
  135. static inline bool bo_queued(struct qaic_bo *bo)
  136. {
  137. return !list_empty(&bo->xfer_list);
  138. }
  139. inline int get_dbc_req_elem_size(void)
  140. {
  141. return sizeof(struct dbc_req);
  142. }
  143. inline int get_dbc_rsp_elem_size(void)
  144. {
  145. return sizeof(struct dbc_rsp);
  146. }
  147. static void free_slice(struct kref *kref)
  148. {
  149. struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count);
  150. slice->bo->total_slice_nents -= slice->nents;
  151. list_del(&slice->slice);
  152. drm_gem_object_put(&slice->bo->base);
  153. sg_free_table(slice->sgt);
  154. kfree(slice->sgt);
  155. kvfree(slice->reqs);
  156. kfree(slice);
  157. }
  158. static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out,
  159. struct sg_table *sgt_in, u64 size, u64 offset)
  160. {
  161. struct scatterlist *sg, *sgn, *sgf, *sgl;
  162. unsigned int len, nents, offf, offl;
  163. struct sg_table *sgt;
  164. size_t total_len;
  165. int ret, j;
  166. /* find out number of relevant nents needed for this mem */
  167. total_len = 0;
  168. sgf = NULL;
  169. sgl = NULL;
  170. nents = 0;
  171. offf = 0;
  172. offl = 0;
  173. size = size ? size : PAGE_SIZE;
  174. for_each_sgtable_dma_sg(sgt_in, sg, j) {
  175. len = sg_dma_len(sg);
  176. if (!len)
  177. continue;
  178. if (offset >= total_len && offset < total_len + len) {
  179. sgf = sg;
  180. offf = offset - total_len;
  181. }
  182. if (sgf)
  183. nents++;
  184. if (offset + size >= total_len &&
  185. offset + size <= total_len + len) {
  186. sgl = sg;
  187. offl = offset + size - total_len;
  188. break;
  189. }
  190. total_len += len;
  191. }
  192. if (!sgf || !sgl) {
  193. ret = -EINVAL;
  194. goto out;
  195. }
  196. sgt = kzalloc_obj(*sgt);
  197. if (!sgt) {
  198. ret = -ENOMEM;
  199. goto out;
  200. }
  201. ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
  202. if (ret)
  203. goto free_sgt;
  204. /* copy relevant sg node and fix page and length */
  205. sgn = sgf;
  206. for_each_sgtable_dma_sg(sgt, sg, j) {
  207. memcpy(sg, sgn, sizeof(*sg));
  208. if (sgn == sgf) {
  209. sg_dma_address(sg) += offf;
  210. sg_dma_len(sg) -= offf;
  211. sg_set_page(sg, sg_page(sgn), sg_dma_len(sg), offf);
  212. } else {
  213. offf = 0;
  214. }
  215. if (sgn == sgl) {
  216. sg_dma_len(sg) = offl - offf;
  217. sg_set_page(sg, sg_page(sgn), offl - offf, offf);
  218. sg_mark_end(sg);
  219. break;
  220. }
  221. sgn = sg_next(sgn);
  222. }
  223. *sgt_out = sgt;
  224. return ret;
  225. free_sgt:
  226. kfree(sgt);
  227. out:
  228. *sgt_out = NULL;
  229. return ret;
  230. }
  231. static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice,
  232. struct qaic_attach_slice_entry *req)
  233. {
  234. __le64 db_addr = cpu_to_le64(req->db_addr);
  235. __le32 db_data = cpu_to_le32(req->db_data);
  236. struct scatterlist *sg;
  237. __u8 cmd = BULK_XFER;
  238. int presync_sem;
  239. u64 dev_addr;
  240. __u8 db_len;
  241. int i;
  242. if (!slice->no_xfer)
  243. cmd |= (slice->dir == DMA_TO_DEVICE ? INBOUND_XFER : OUTBOUND_XFER);
  244. if (req->db_len && !IS_ALIGNED(req->db_addr, req->db_len / 8))
  245. return -EINVAL;
  246. presync_sem = req->sem0.presync + req->sem1.presync + req->sem2.presync + req->sem3.presync;
  247. if (presync_sem > 1)
  248. return -EINVAL;
  249. presync_sem = req->sem0.presync << 0 | req->sem1.presync << 1 |
  250. req->sem2.presync << 2 | req->sem3.presync << 3;
  251. switch (req->db_len) {
  252. case 32:
  253. db_len = BIT(7);
  254. break;
  255. case 16:
  256. db_len = BIT(7) | 1;
  257. break;
  258. case 8:
  259. db_len = BIT(7) | 2;
  260. break;
  261. case 0:
  262. db_len = 0; /* doorbell is not active for this command */
  263. break;
  264. default:
  265. return -EINVAL; /* should never hit this */
  266. }
  267. /*
  268. * When we end up splitting up a single request (ie a buf slice) into
  269. * multiple DMA requests, we have to manage the sync data carefully.
  270. * There can only be one presync sem. That needs to be on every xfer
  271. * so that the DMA engine doesn't transfer data before the receiver is
  272. * ready. We only do the doorbell and postsync sems after the xfer.
  273. * To guarantee previous xfers for the request are complete, we use a
  274. * fence.
  275. */
  276. dev_addr = req->dev_addr;
  277. for_each_sgtable_dma_sg(slice->sgt, sg, i) {
  278. slice->reqs[i].cmd = cmd;
  279. slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
  280. sg_dma_address(sg) : dev_addr);
  281. slice->reqs[i].dest_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
  282. dev_addr : sg_dma_address(sg));
  283. /*
  284. * sg_dma_len(sg) returns size of a DMA segment, maximum DMA
  285. * segment size is set to UINT_MAX by qaic and hence return
  286. * values of sg_dma_len(sg) can never exceed u32 range. So,
  287. * by down sizing we are not corrupting the value.
  288. */
  289. slice->reqs[i].len = cpu_to_le32((u32)sg_dma_len(sg));
  290. switch (presync_sem) {
  291. case BIT(0):
  292. slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val,
  293. req->sem0.index,
  294. req->sem0.presync,
  295. req->sem0.cmd,
  296. req->sem0.flags));
  297. break;
  298. case BIT(1):
  299. slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val,
  300. req->sem1.index,
  301. req->sem1.presync,
  302. req->sem1.cmd,
  303. req->sem1.flags));
  304. break;
  305. case BIT(2):
  306. slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val,
  307. req->sem2.index,
  308. req->sem2.presync,
  309. req->sem2.cmd,
  310. req->sem2.flags));
  311. break;
  312. case BIT(3):
  313. slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val,
  314. req->sem3.index,
  315. req->sem3.presync,
  316. req->sem3.cmd,
  317. req->sem3.flags));
  318. break;
  319. }
  320. dev_addr += sg_dma_len(sg);
  321. }
  322. /* add post transfer stuff to last segment */
  323. i--;
  324. slice->reqs[i].cmd |= GEN_COMPLETION;
  325. slice->reqs[i].db_addr = db_addr;
  326. slice->reqs[i].db_len = db_len;
  327. slice->reqs[i].db_data = db_data;
  328. /*
  329. * Add a fence if we have more than one request going to the hardware
  330. * representing the entirety of the user request, and the user request
  331. * has no presync condition.
  332. * Fences are expensive, so we try to avoid them. We rely on the
  333. * hardware behavior to avoid needing one when there is a presync
  334. * condition. When a presync exists, all requests for that same
  335. * presync will be queued into a fifo. Thus, since we queue the
  336. * post xfer activity only on the last request we queue, the hardware
  337. * will ensure that the last queued request is processed last, thus
  338. * making sure the post xfer activity happens at the right time without
  339. * a fence.
  340. */
  341. if (i && !presync_sem)
  342. req->sem0.flags |= (slice->dir == DMA_TO_DEVICE ?
  343. QAIC_SEM_INSYNCFENCE : QAIC_SEM_OUTSYNCFENCE);
  344. slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, req->sem0.index,
  345. req->sem0.presync, req->sem0.cmd,
  346. req->sem0.flags));
  347. slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, req->sem1.index,
  348. req->sem1.presync, req->sem1.cmd,
  349. req->sem1.flags));
  350. slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, req->sem2.index,
  351. req->sem2.presync, req->sem2.cmd,
  352. req->sem2.flags));
  353. slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, req->sem3.index,
  354. req->sem3.presync, req->sem3.cmd,
  355. req->sem3.flags));
  356. return 0;
  357. }
  358. static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
  359. struct qaic_attach_slice_entry *slice_ent)
  360. {
  361. struct sg_table *sgt = NULL;
  362. struct bo_slice *slice;
  363. int ret;
  364. ret = clone_range_of_sgt_for_slice(qdev, &sgt, bo->sgt, slice_ent->size, slice_ent->offset);
  365. if (ret)
  366. goto out;
  367. slice = kmalloc_obj(*slice);
  368. if (!slice) {
  369. ret = -ENOMEM;
  370. goto free_sgt;
  371. }
  372. slice->reqs = kvzalloc_objs(*slice->reqs, sgt->nents);
  373. if (!slice->reqs) {
  374. ret = -ENOMEM;
  375. goto free_slice;
  376. }
  377. slice->no_xfer = !slice_ent->size;
  378. slice->sgt = sgt;
  379. slice->nents = sgt->nents;
  380. slice->dir = bo->dir;
  381. slice->bo = bo;
  382. slice->size = slice_ent->size;
  383. slice->offset = slice_ent->offset;
  384. ret = encode_reqs(qdev, slice, slice_ent);
  385. if (ret)
  386. goto free_req;
  387. bo->total_slice_nents += sgt->nents;
  388. kref_init(&slice->ref_count);
  389. drm_gem_object_get(&bo->base);
  390. list_add_tail(&slice->slice, &bo->slices);
  391. return 0;
  392. free_req:
  393. kvfree(slice->reqs);
  394. free_slice:
  395. kfree(slice);
  396. free_sgt:
  397. sg_free_table(sgt);
  398. kfree(sgt);
  399. out:
  400. return ret;
  401. }
  402. static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 size)
  403. {
  404. struct scatterlist *sg;
  405. struct sg_table *sgt;
  406. struct page **pages;
  407. int *pages_order;
  408. int buf_extra;
  409. int max_order;
  410. int nr_pages;
  411. int ret = 0;
  412. int i, j, k;
  413. int order;
  414. if (size) {
  415. nr_pages = DIV_ROUND_UP(size, PAGE_SIZE);
  416. /*
  417. * calculate how much extra we are going to allocate, to remove
  418. * later
  419. */
  420. buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE;
  421. max_order = min(MAX_PAGE_ORDER, get_order(size));
  422. } else {
  423. /* allocate a single page for book keeping */
  424. nr_pages = 1;
  425. buf_extra = 0;
  426. max_order = 0;
  427. }
  428. pages = kvmalloc_array(nr_pages, sizeof(*pages) + sizeof(*pages_order), GFP_KERNEL);
  429. if (!pages) {
  430. ret = -ENOMEM;
  431. goto out;
  432. }
  433. pages_order = (void *)pages + sizeof(*pages) * nr_pages;
  434. /*
  435. * Allocate requested memory using alloc_pages. It is possible to allocate
  436. * the requested memory in multiple chunks by calling alloc_pages
  437. * multiple times. Use SG table to handle multiple allocated pages.
  438. */
  439. i = 0;
  440. while (nr_pages > 0) {
  441. order = min(get_order(nr_pages * PAGE_SIZE), max_order);
  442. while (1) {
  443. pages[i] = alloc_pages(GFP_KERNEL | GFP_HIGHUSER |
  444. __GFP_NOWARN | __GFP_ZERO |
  445. (order ? __GFP_NORETRY : __GFP_RETRY_MAYFAIL),
  446. order);
  447. if (pages[i])
  448. break;
  449. if (!order--) {
  450. ret = -ENOMEM;
  451. goto free_partial_alloc;
  452. }
  453. }
  454. max_order = order;
  455. pages_order[i] = order;
  456. nr_pages -= 1 << order;
  457. if (nr_pages <= 0)
  458. /* account for over allocation */
  459. buf_extra += abs(nr_pages) * PAGE_SIZE;
  460. i++;
  461. }
  462. sgt = kmalloc_obj(*sgt);
  463. if (!sgt) {
  464. ret = -ENOMEM;
  465. goto free_partial_alloc;
  466. }
  467. if (sg_alloc_table(sgt, i, GFP_KERNEL)) {
  468. ret = -ENOMEM;
  469. goto free_sgt;
  470. }
  471. /* Populate the SG table with the allocated memory pages */
  472. sg = sgt->sgl;
  473. for (k = 0; k < i; k++, sg = sg_next(sg)) {
  474. /* Last entry requires special handling */
  475. if (k < i - 1) {
  476. sg_set_page(sg, pages[k], PAGE_SIZE << pages_order[k], 0);
  477. } else {
  478. sg_set_page(sg, pages[k], (PAGE_SIZE << pages_order[k]) - buf_extra, 0);
  479. sg_mark_end(sg);
  480. }
  481. }
  482. kvfree(pages);
  483. *sgt_out = sgt;
  484. return ret;
  485. free_sgt:
  486. kfree(sgt);
  487. free_partial_alloc:
  488. for (j = 0; j < i; j++)
  489. __free_pages(pages[j], pages_order[j]);
  490. kvfree(pages);
  491. out:
  492. *sgt_out = NULL;
  493. return ret;
  494. }
  495. static bool invalid_sem(struct qaic_sem *sem)
  496. {
  497. if (sem->val & ~SEM_VAL_MASK || sem->index & ~SEM_INDEX_MASK ||
  498. !(sem->presync == 0 || sem->presync == 1) || sem->pad ||
  499. sem->flags & ~(QAIC_SEM_INSYNCFENCE | QAIC_SEM_OUTSYNCFENCE) ||
  500. sem->cmd > QAIC_SEM_WAIT_GT_0)
  501. return true;
  502. return false;
  503. }
  504. static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent,
  505. u32 count, u64 total_size)
  506. {
  507. u64 total;
  508. int i;
  509. for (i = 0; i < count; i++) {
  510. if (!(slice_ent[i].db_len == 32 || slice_ent[i].db_len == 16 ||
  511. slice_ent[i].db_len == 8 || slice_ent[i].db_len == 0) ||
  512. invalid_sem(&slice_ent[i].sem0) || invalid_sem(&slice_ent[i].sem1) ||
  513. invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3))
  514. return -EINVAL;
  515. if (check_add_overflow(slice_ent[i].offset, slice_ent[i].size, &total) ||
  516. total > total_size)
  517. return -EINVAL;
  518. }
  519. return 0;
  520. }
  521. static void qaic_free_sgt(struct sg_table *sgt)
  522. {
  523. struct scatterlist *sg;
  524. if (!sgt)
  525. return;
  526. for (sg = sgt->sgl; sg; sg = sg_next(sg))
  527. if (sg_page(sg))
  528. __free_pages(sg_page(sg), get_order(sg->length));
  529. sg_free_table(sgt);
  530. kfree(sgt);
  531. }
  532. static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent,
  533. const struct drm_gem_object *obj)
  534. {
  535. struct qaic_bo *bo = to_qaic_bo(obj);
  536. drm_printf_indent(p, indent, "BO DMA direction %d\n", bo->dir);
  537. }
  538. static const struct vm_operations_struct drm_vm_ops = {
  539. .open = drm_gem_vm_open,
  540. .close = drm_gem_vm_close,
  541. };
  542. static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
  543. {
  544. struct qaic_bo *bo = to_qaic_bo(obj);
  545. unsigned long offset = 0;
  546. struct scatterlist *sg;
  547. int ret = 0;
  548. if (drm_gem_is_imported(obj))
  549. return -EINVAL;
  550. for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
  551. if (sg_page(sg)) {
  552. ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)),
  553. sg->length, vma->vm_page_prot);
  554. if (ret)
  555. goto out;
  556. offset += sg->length;
  557. }
  558. }
  559. out:
  560. return ret;
  561. }
  562. static void qaic_free_object(struct drm_gem_object *obj)
  563. {
  564. struct qaic_bo *bo = to_qaic_bo(obj);
  565. if (drm_gem_is_imported(obj)) {
  566. /* DMABUF/PRIME Path */
  567. drm_prime_gem_destroy(obj, NULL);
  568. } else {
  569. /* Private buffer allocation path */
  570. qaic_free_sgt(bo->sgt);
  571. }
  572. mutex_destroy(&bo->lock);
  573. drm_gem_object_release(obj);
  574. kfree(bo);
  575. }
  576. static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj)
  577. {
  578. struct qaic_bo *bo = to_qaic_bo(obj);
  579. struct scatterlist *sg, *sg_in;
  580. struct sg_table *sgt, *sgt_in;
  581. int i;
  582. sgt_in = bo->sgt;
  583. sgt = kmalloc_obj(*sgt);
  584. if (!sgt)
  585. return ERR_PTR(-ENOMEM);
  586. if (sg_alloc_table(sgt, sgt_in->orig_nents, GFP_KERNEL)) {
  587. kfree(sgt);
  588. return ERR_PTR(-ENOMEM);
  589. }
  590. sg = sgt->sgl;
  591. for_each_sgtable_sg(sgt_in, sg_in, i) {
  592. memcpy(sg, sg_in, sizeof(*sg));
  593. sg = sg_next(sg);
  594. }
  595. return sgt;
  596. }
  597. static const struct drm_gem_object_funcs qaic_gem_funcs = {
  598. .free = qaic_free_object,
  599. .get_sg_table = qaic_get_sg_table,
  600. .print_info = qaic_gem_print_info,
  601. .mmap = qaic_gem_object_mmap,
  602. .vm_ops = &drm_vm_ops,
  603. };
  604. static void qaic_init_bo(struct qaic_bo *bo, bool reinit)
  605. {
  606. if (reinit) {
  607. bo->sliced = false;
  608. reinit_completion(&bo->xfer_done);
  609. } else {
  610. mutex_init(&bo->lock);
  611. init_completion(&bo->xfer_done);
  612. }
  613. complete_all(&bo->xfer_done);
  614. INIT_LIST_HEAD(&bo->slices);
  615. INIT_LIST_HEAD(&bo->xfer_list);
  616. }
  617. static struct qaic_bo *qaic_alloc_init_bo(void)
  618. {
  619. struct qaic_bo *bo;
  620. bo = kzalloc_obj(*bo);
  621. if (!bo)
  622. return ERR_PTR(-ENOMEM);
  623. qaic_init_bo(bo, false);
  624. return bo;
  625. }
  626. int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  627. {
  628. struct qaic_create_bo *args = data;
  629. int usr_rcu_id, qdev_rcu_id;
  630. struct drm_gem_object *obj;
  631. struct qaic_device *qdev;
  632. struct qaic_user *usr;
  633. struct qaic_bo *bo;
  634. size_t size;
  635. int ret;
  636. if (args->pad)
  637. return -EINVAL;
  638. size = PAGE_ALIGN(args->size);
  639. if (size == 0)
  640. return -EINVAL;
  641. usr = file_priv->driver_priv;
  642. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  643. if (!usr->qddev) {
  644. ret = -ENODEV;
  645. goto unlock_usr_srcu;
  646. }
  647. qdev = usr->qddev->qdev;
  648. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  649. if (qdev->dev_state != QAIC_ONLINE) {
  650. ret = -ENODEV;
  651. goto unlock_dev_srcu;
  652. }
  653. bo = qaic_alloc_init_bo();
  654. if (IS_ERR(bo)) {
  655. ret = PTR_ERR(bo);
  656. goto unlock_dev_srcu;
  657. }
  658. obj = &bo->base;
  659. drm_gem_private_object_init(dev, obj, size);
  660. obj->funcs = &qaic_gem_funcs;
  661. ret = create_sgt(qdev, &bo->sgt, size);
  662. if (ret)
  663. goto free_bo;
  664. ret = drm_gem_create_mmap_offset(obj);
  665. if (ret)
  666. goto free_bo;
  667. ret = drm_gem_handle_create(file_priv, obj, &args->handle);
  668. if (ret)
  669. goto free_bo;
  670. drm_gem_object_put(obj);
  671. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  672. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  673. return 0;
  674. free_bo:
  675. drm_gem_object_put(obj);
  676. unlock_dev_srcu:
  677. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  678. unlock_usr_srcu:
  679. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  680. return ret;
  681. }
  682. int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  683. {
  684. struct qaic_mmap_bo *args = data;
  685. int usr_rcu_id, qdev_rcu_id;
  686. struct drm_gem_object *obj;
  687. struct qaic_device *qdev;
  688. struct qaic_user *usr;
  689. int ret = 0;
  690. usr = file_priv->driver_priv;
  691. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  692. if (!usr->qddev) {
  693. ret = -ENODEV;
  694. goto unlock_usr_srcu;
  695. }
  696. qdev = usr->qddev->qdev;
  697. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  698. if (qdev->dev_state != QAIC_ONLINE) {
  699. ret = -ENODEV;
  700. goto unlock_dev_srcu;
  701. }
  702. obj = drm_gem_object_lookup(file_priv, args->handle);
  703. if (!obj) {
  704. ret = -ENOENT;
  705. goto unlock_dev_srcu;
  706. }
  707. args->offset = drm_vma_node_offset_addr(&obj->vma_node);
  708. drm_gem_object_put(obj);
  709. unlock_dev_srcu:
  710. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  711. unlock_usr_srcu:
  712. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  713. return ret;
  714. }
  715. struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
  716. {
  717. struct dma_buf_attachment *attach;
  718. struct drm_gem_object *obj;
  719. struct qaic_bo *bo;
  720. int ret;
  721. bo = qaic_alloc_init_bo();
  722. if (IS_ERR(bo)) {
  723. ret = PTR_ERR(bo);
  724. goto out;
  725. }
  726. obj = &bo->base;
  727. get_dma_buf(dma_buf);
  728. attach = dma_buf_attach(dma_buf, dev->dev);
  729. if (IS_ERR(attach)) {
  730. ret = PTR_ERR(attach);
  731. goto attach_fail;
  732. }
  733. if (!attach->dmabuf->size) {
  734. ret = -EINVAL;
  735. goto size_align_fail;
  736. }
  737. drm_gem_private_object_init(dev, obj, attach->dmabuf->size);
  738. /*
  739. * skipping dma_buf_map_attachment() as we do not know the direction
  740. * just yet. Once the direction is known in the subsequent IOCTL to
  741. * attach slicing, we can do it then.
  742. */
  743. obj->funcs = &qaic_gem_funcs;
  744. obj->import_attach = attach;
  745. obj->resv = dma_buf->resv;
  746. return obj;
  747. size_align_fail:
  748. dma_buf_detach(dma_buf, attach);
  749. attach_fail:
  750. dma_buf_put(dma_buf);
  751. kfree(bo);
  752. out:
  753. return ERR_PTR(ret);
  754. }
  755. static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_hdr *hdr)
  756. {
  757. struct drm_gem_object *obj = &bo->base;
  758. struct sg_table *sgt;
  759. int ret;
  760. sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir);
  761. if (IS_ERR(sgt)) {
  762. ret = PTR_ERR(sgt);
  763. return ret;
  764. }
  765. bo->sgt = sgt;
  766. return 0;
  767. }
  768. static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
  769. struct qaic_attach_slice_hdr *hdr)
  770. {
  771. int ret;
  772. ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
  773. if (ret)
  774. return -EFAULT;
  775. return 0;
  776. }
  777. static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
  778. struct qaic_attach_slice_hdr *hdr)
  779. {
  780. int ret;
  781. if (drm_gem_is_imported(&bo->base))
  782. ret = qaic_prepare_import_bo(bo, hdr);
  783. else
  784. ret = qaic_prepare_export_bo(qdev, bo, hdr);
  785. bo->dir = hdr->dir;
  786. bo->dbc = &qdev->dbc[hdr->dbc_id];
  787. bo->nr_slice = hdr->count;
  788. return ret;
  789. }
  790. static void qaic_unprepare_import_bo(struct qaic_bo *bo)
  791. {
  792. dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir);
  793. bo->sgt = NULL;
  794. }
  795. static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo)
  796. {
  797. dma_unmap_sgtable(&qdev->pdev->dev, bo->sgt, bo->dir, 0);
  798. }
  799. static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
  800. {
  801. if (drm_gem_is_imported(&bo->base))
  802. qaic_unprepare_import_bo(bo);
  803. else
  804. qaic_unprepare_export_bo(qdev, bo);
  805. bo->dir = 0;
  806. bo->dbc = NULL;
  807. bo->nr_slice = 0;
  808. }
  809. static void qaic_free_slices_bo(struct qaic_bo *bo)
  810. {
  811. struct bo_slice *slice, *temp;
  812. list_for_each_entry_safe(slice, temp, &bo->slices, slice)
  813. kref_put(&slice->ref_count, free_slice);
  814. if (WARN_ON_ONCE(bo->total_slice_nents != 0))
  815. bo->total_slice_nents = 0;
  816. bo->nr_slice = 0;
  817. }
  818. static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
  819. struct qaic_attach_slice_hdr *hdr,
  820. struct qaic_attach_slice_entry *slice_ent)
  821. {
  822. int ret, i;
  823. for (i = 0; i < hdr->count; i++) {
  824. ret = qaic_map_one_slice(qdev, bo, &slice_ent[i]);
  825. if (ret) {
  826. qaic_free_slices_bo(bo);
  827. return ret;
  828. }
  829. }
  830. if (bo->total_slice_nents > bo->dbc->nelem) {
  831. qaic_free_slices_bo(bo);
  832. return -ENOSPC;
  833. }
  834. return 0;
  835. }
  836. int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  837. {
  838. struct qaic_attach_slice_entry *slice_ent;
  839. struct qaic_attach_slice *args = data;
  840. int rcu_id, usr_rcu_id, qdev_rcu_id;
  841. struct dma_bridge_chan *dbc;
  842. struct drm_gem_object *obj;
  843. struct qaic_device *qdev;
  844. unsigned long arg_size;
  845. struct qaic_user *usr;
  846. u8 __user *user_data;
  847. struct qaic_bo *bo;
  848. int ret;
  849. if (args->hdr.count == 0)
  850. return -EINVAL;
  851. if (check_mul_overflow((unsigned long)args->hdr.count,
  852. (unsigned long)sizeof(*slice_ent),
  853. &arg_size))
  854. return -EINVAL;
  855. if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
  856. return -EINVAL;
  857. if (args->data == 0)
  858. return -EINVAL;
  859. usr = file_priv->driver_priv;
  860. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  861. if (!usr->qddev) {
  862. ret = -ENODEV;
  863. goto unlock_usr_srcu;
  864. }
  865. qdev = usr->qddev->qdev;
  866. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  867. if (qdev->dev_state != QAIC_ONLINE) {
  868. ret = -ENODEV;
  869. goto unlock_dev_srcu;
  870. }
  871. if (args->hdr.dbc_id >= qdev->num_dbc) {
  872. ret = -EINVAL;
  873. goto unlock_dev_srcu;
  874. }
  875. user_data = u64_to_user_ptr(args->data);
  876. slice_ent = memdup_user(user_data, arg_size);
  877. if (IS_ERR(slice_ent)) {
  878. ret = PTR_ERR(slice_ent);
  879. goto unlock_dev_srcu;
  880. }
  881. obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
  882. if (!obj) {
  883. ret = -ENOENT;
  884. goto free_slice_ent;
  885. }
  886. ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, obj->size);
  887. if (ret)
  888. goto put_bo;
  889. bo = to_qaic_bo(obj);
  890. ret = mutex_lock_interruptible(&bo->lock);
  891. if (ret)
  892. goto put_bo;
  893. if (bo->sliced) {
  894. ret = -EINVAL;
  895. goto unlock_bo;
  896. }
  897. dbc = &qdev->dbc[args->hdr.dbc_id];
  898. rcu_id = srcu_read_lock(&dbc->ch_lock);
  899. if (dbc->usr != usr) {
  900. ret = -EINVAL;
  901. goto unlock_ch_srcu;
  902. }
  903. if (dbc->id == qdev->ssr_dbc) {
  904. ret = -EPIPE;
  905. goto unlock_ch_srcu;
  906. }
  907. ret = qaic_prepare_bo(qdev, bo, &args->hdr);
  908. if (ret)
  909. goto unlock_ch_srcu;
  910. ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent);
  911. if (ret)
  912. goto unprepare_bo;
  913. if (args->hdr.dir == DMA_TO_DEVICE)
  914. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir);
  915. bo->sliced = true;
  916. list_add_tail(&bo->bo_list, &bo->dbc->bo_lists);
  917. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  918. mutex_unlock(&bo->lock);
  919. kfree(slice_ent);
  920. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  921. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  922. return 0;
  923. unprepare_bo:
  924. qaic_unprepare_bo(qdev, bo);
  925. unlock_ch_srcu:
  926. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  927. unlock_bo:
  928. mutex_unlock(&bo->lock);
  929. put_bo:
  930. drm_gem_object_put(obj);
  931. free_slice_ent:
  932. kfree(slice_ent);
  933. unlock_dev_srcu:
  934. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  935. unlock_usr_srcu:
  936. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  937. return ret;
  938. }
  939. static inline u32 fifo_space_avail(u32 head, u32 tail, u32 q_size)
  940. {
  941. u32 avail = head - tail - 1;
  942. if (head <= tail)
  943. avail += q_size;
  944. return avail;
  945. }
  946. static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id,
  947. u32 head, u32 *ptail)
  948. {
  949. struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
  950. struct dbc_req *reqs = slice->reqs;
  951. u32 tail = *ptail;
  952. u32 avail;
  953. avail = fifo_space_avail(head, tail, dbc->nelem);
  954. if (avail < slice->nents)
  955. return -EAGAIN;
  956. if (tail + slice->nents > dbc->nelem) {
  957. avail = dbc->nelem - tail;
  958. avail = min_t(u32, avail, slice->nents);
  959. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
  960. reqs += avail;
  961. avail = slice->nents - avail;
  962. if (avail)
  963. memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail);
  964. } else {
  965. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * slice->nents);
  966. }
  967. *ptail = (tail + slice->nents) % dbc->nelem;
  968. return 0;
  969. }
  970. static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice,
  971. u64 resize, struct dma_bridge_chan *dbc, u32 head,
  972. u32 *ptail)
  973. {
  974. struct dbc_req *reqs = slice->reqs;
  975. struct dbc_req *last_req;
  976. u32 tail = *ptail;
  977. u64 last_bytes;
  978. u32 first_n;
  979. u32 avail;
  980. avail = fifo_space_avail(head, tail, dbc->nelem);
  981. /*
  982. * After this for loop is complete, first_n represents the index
  983. * of the last DMA request of this slice that needs to be
  984. * transferred after resizing and last_bytes represents DMA size
  985. * of that request.
  986. */
  987. last_bytes = resize;
  988. for (first_n = 0; first_n < slice->nents; first_n++)
  989. if (last_bytes > le32_to_cpu(reqs[first_n].len))
  990. last_bytes -= le32_to_cpu(reqs[first_n].len);
  991. else
  992. break;
  993. if (avail < (first_n + 1))
  994. return -EAGAIN;
  995. if (first_n) {
  996. if (tail + first_n > dbc->nelem) {
  997. avail = dbc->nelem - tail;
  998. avail = min_t(u32, avail, first_n);
  999. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
  1000. last_req = reqs + avail;
  1001. avail = first_n - avail;
  1002. if (avail)
  1003. memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail);
  1004. } else {
  1005. memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * first_n);
  1006. }
  1007. }
  1008. /*
  1009. * Copy over the last entry. Here we need to adjust len to the left over
  1010. * size, and set src and dst to the entry it is copied to.
  1011. */
  1012. last_req = fifo_at(dbc->req_q_base, (tail + first_n) % dbc->nelem);
  1013. memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs));
  1014. /*
  1015. * last_bytes holds size of a DMA segment, maximum DMA segment size is
  1016. * set to UINT_MAX by qaic and hence last_bytes can never exceed u32
  1017. * range. So, by down sizing we are not corrupting the value.
  1018. */
  1019. last_req->len = cpu_to_le32((u32)last_bytes);
  1020. last_req->src_addr = reqs[first_n].src_addr;
  1021. last_req->dest_addr = reqs[first_n].dest_addr;
  1022. if (!last_bytes)
  1023. /* Disable DMA transfer */
  1024. last_req->cmd = GENMASK(7, 2) & reqs[first_n].cmd;
  1025. *ptail = (tail + first_n + 1) % dbc->nelem;
  1026. return 0;
  1027. }
  1028. static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *file_priv,
  1029. struct qaic_execute_entry *exec, unsigned int count,
  1030. bool is_partial, struct dma_bridge_chan *dbc, u32 head,
  1031. u32 *tail)
  1032. {
  1033. struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
  1034. struct drm_gem_object *obj;
  1035. struct bo_slice *slice;
  1036. unsigned long flags;
  1037. struct qaic_bo *bo;
  1038. int i, j;
  1039. int ret;
  1040. for (i = 0; i < count; i++) {
  1041. /*
  1042. * ref count will be decremented when the transfer of this
  1043. * buffer is complete. It is inside dbc_irq_threaded_fn().
  1044. */
  1045. obj = drm_gem_object_lookup(file_priv,
  1046. is_partial ? pexec[i].handle : exec[i].handle);
  1047. if (!obj) {
  1048. ret = -ENOENT;
  1049. goto failed_to_send_bo;
  1050. }
  1051. bo = to_qaic_bo(obj);
  1052. ret = mutex_lock_interruptible(&bo->lock);
  1053. if (ret)
  1054. goto failed_to_send_bo;
  1055. if (!bo->sliced) {
  1056. ret = -EINVAL;
  1057. goto unlock_bo;
  1058. }
  1059. if (is_partial && pexec[i].resize > bo->base.size) {
  1060. ret = -EINVAL;
  1061. goto unlock_bo;
  1062. }
  1063. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1064. if (bo_queued(bo)) {
  1065. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1066. ret = -EINVAL;
  1067. goto unlock_bo;
  1068. }
  1069. bo->req_id = dbc->next_req_id++;
  1070. list_for_each_entry(slice, &bo->slices, slice) {
  1071. for (j = 0; j < slice->nents; j++)
  1072. slice->reqs[j].req_id = cpu_to_le16(bo->req_id);
  1073. if (is_partial && (!pexec[i].resize || pexec[i].resize <= slice->offset))
  1074. /* Configure the slice for no DMA transfer */
  1075. ret = copy_partial_exec_reqs(qdev, slice, 0, dbc, head, tail);
  1076. else if (is_partial && pexec[i].resize < slice->offset + slice->size)
  1077. /* Configure the slice to be partially DMA transferred */
  1078. ret = copy_partial_exec_reqs(qdev, slice,
  1079. pexec[i].resize - slice->offset, dbc,
  1080. head, tail);
  1081. else
  1082. ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
  1083. if (ret) {
  1084. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1085. goto unlock_bo;
  1086. }
  1087. }
  1088. reinit_completion(&bo->xfer_done);
  1089. list_add_tail(&bo->xfer_list, &dbc->xfer_list);
  1090. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1091. dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir);
  1092. mutex_unlock(&bo->lock);
  1093. }
  1094. return 0;
  1095. unlock_bo:
  1096. mutex_unlock(&bo->lock);
  1097. failed_to_send_bo:
  1098. if (likely(obj))
  1099. drm_gem_object_put(obj);
  1100. for (j = 0; j < i; j++) {
  1101. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1102. bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list);
  1103. obj = &bo->base;
  1104. list_del_init(&bo->xfer_list);
  1105. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1106. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
  1107. drm_gem_object_put(obj);
  1108. }
  1109. return ret;
  1110. }
  1111. static void update_profiling_data(struct drm_file *file_priv,
  1112. struct qaic_execute_entry *exec, unsigned int count,
  1113. bool is_partial, u64 received_ts, u64 submit_ts, u32 queue_level)
  1114. {
  1115. struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
  1116. struct drm_gem_object *obj;
  1117. struct qaic_bo *bo;
  1118. int i;
  1119. for (i = 0; i < count; i++) {
  1120. /*
  1121. * Since we already committed the BO to hardware, the only way
  1122. * this should fail is a pending signal. We can't cancel the
  1123. * submit to hardware, so we have to just skip the profiling
  1124. * data. In case the signal is not fatal to the process, we
  1125. * return success so that the user doesn't try to resubmit.
  1126. */
  1127. obj = drm_gem_object_lookup(file_priv,
  1128. is_partial ? pexec[i].handle : exec[i].handle);
  1129. if (!obj)
  1130. break;
  1131. bo = to_qaic_bo(obj);
  1132. bo->perf_stats.req_received_ts = received_ts;
  1133. bo->perf_stats.req_submit_ts = submit_ts;
  1134. bo->perf_stats.queue_level_before = queue_level;
  1135. queue_level += bo->total_slice_nents;
  1136. drm_gem_object_put(obj);
  1137. }
  1138. }
  1139. static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv,
  1140. bool is_partial)
  1141. {
  1142. struct qaic_execute *args = data;
  1143. struct qaic_execute_entry *exec;
  1144. struct dma_bridge_chan *dbc;
  1145. int usr_rcu_id, qdev_rcu_id;
  1146. struct qaic_device *qdev;
  1147. struct qaic_user *usr;
  1148. u64 received_ts;
  1149. u32 queue_level;
  1150. u64 submit_ts;
  1151. int rcu_id;
  1152. u32 head;
  1153. u32 tail;
  1154. u64 size;
  1155. int ret;
  1156. received_ts = ktime_get_ns();
  1157. size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec);
  1158. if (args->hdr.count == 0)
  1159. return -EINVAL;
  1160. exec = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, size);
  1161. if (IS_ERR(exec))
  1162. return PTR_ERR(exec);
  1163. usr = file_priv->driver_priv;
  1164. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1165. if (!usr->qddev) {
  1166. ret = -ENODEV;
  1167. goto unlock_usr_srcu;
  1168. }
  1169. qdev = usr->qddev->qdev;
  1170. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1171. if (qdev->dev_state != QAIC_ONLINE) {
  1172. ret = -ENODEV;
  1173. goto unlock_dev_srcu;
  1174. }
  1175. if (args->hdr.dbc_id >= qdev->num_dbc) {
  1176. ret = -EINVAL;
  1177. goto unlock_dev_srcu;
  1178. }
  1179. dbc = &qdev->dbc[args->hdr.dbc_id];
  1180. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1181. if (!dbc->usr || dbc->usr->handle != usr->handle) {
  1182. ret = -EPERM;
  1183. goto release_ch_rcu;
  1184. }
  1185. if (dbc->id == qdev->ssr_dbc) {
  1186. ret = -EPIPE;
  1187. goto release_ch_rcu;
  1188. }
  1189. ret = mutex_lock_interruptible(&dbc->req_lock);
  1190. if (ret)
  1191. goto release_ch_rcu;
  1192. head = readl(dbc->dbc_base + REQHP_OFF);
  1193. tail = readl(dbc->dbc_base + REQTP_OFF);
  1194. if (head == U32_MAX || tail == U32_MAX) {
  1195. /* PCI link error */
  1196. ret = -ENODEV;
  1197. goto unlock_req_lock;
  1198. }
  1199. queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail);
  1200. ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc,
  1201. head, &tail);
  1202. if (ret)
  1203. goto unlock_req_lock;
  1204. /* Finalize commit to hardware */
  1205. submit_ts = ktime_get_ns();
  1206. writel(tail, dbc->dbc_base + REQTP_OFF);
  1207. mutex_unlock(&dbc->req_lock);
  1208. update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts,
  1209. submit_ts, queue_level);
  1210. if (datapath_polling)
  1211. schedule_work(&dbc->poll_work);
  1212. unlock_req_lock:
  1213. if (ret)
  1214. mutex_unlock(&dbc->req_lock);
  1215. release_ch_rcu:
  1216. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1217. unlock_dev_srcu:
  1218. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1219. unlock_usr_srcu:
  1220. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1221. kfree(exec);
  1222. return ret;
  1223. }
  1224. int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1225. {
  1226. return __qaic_execute_bo_ioctl(dev, data, file_priv, false);
  1227. }
  1228. int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1229. {
  1230. return __qaic_execute_bo_ioctl(dev, data, file_priv, true);
  1231. }
  1232. /*
  1233. * Our interrupt handling is a bit more complicated than a simple ideal, but
  1234. * sadly necessary.
  1235. *
  1236. * Each dbc has a completion queue. Entries in the queue correspond to DMA
  1237. * requests which the device has processed. The hardware already has a built
  1238. * in irq mitigation. When the device puts an entry into the queue, it will
  1239. * only trigger an interrupt if the queue was empty. Therefore, when adding
  1240. * the Nth event to a non-empty queue, the hardware doesn't trigger an
  1241. * interrupt. This means the host doesn't get additional interrupts signaling
  1242. * the same thing - the queue has something to process.
  1243. * This behavior can be overridden in the DMA request.
  1244. * This means that when the host receives an interrupt, it is required to
  1245. * drain the queue.
  1246. *
  1247. * This behavior is what NAPI attempts to accomplish, although we can't use
  1248. * NAPI as we don't have a netdev. We use threaded irqs instead.
  1249. *
  1250. * However, there is a situation where the host drains the queue fast enough
  1251. * that every event causes an interrupt. Typically this is not a problem as
  1252. * the rate of events would be low. However, that is not the case with
  1253. * lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of
  1254. * lprnet, the host receives roughly 80k interrupts per second from the device
  1255. * (per /proc/interrupts). While NAPI documentation indicates the host should
  1256. * just chug along, sadly that behavior causes instability in some hosts.
  1257. *
  1258. * Therefore, we implement an interrupt disable scheme similar to NAPI. The
  1259. * key difference is that we will delay after draining the queue for a small
  1260. * time to allow additional events to come in via polling. Using the above
  1261. * lprnet workload, this reduces the number of interrupts processed from
  1262. * ~80k/sec to about 64 in 5 minutes and appears to solve the system
  1263. * instability.
  1264. */
  1265. irqreturn_t dbc_irq_handler(int irq, void *data)
  1266. {
  1267. struct dma_bridge_chan *dbc = data;
  1268. int rcu_id;
  1269. u32 head;
  1270. u32 tail;
  1271. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1272. if (datapath_polling) {
  1273. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1274. /*
  1275. * Normally datapath_polling will not have irqs enabled, but
  1276. * when running with only one MSI the interrupt is shared with
  1277. * MHI so it cannot be disabled. Return ASAP instead.
  1278. */
  1279. return IRQ_HANDLED;
  1280. }
  1281. if (!dbc->usr) {
  1282. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1283. return IRQ_HANDLED;
  1284. }
  1285. head = readl(dbc->dbc_base + RSPHP_OFF);
  1286. if (head == U32_MAX) { /* PCI link error */
  1287. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1288. return IRQ_NONE;
  1289. }
  1290. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1291. if (tail == U32_MAX) { /* PCI link error */
  1292. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1293. return IRQ_NONE;
  1294. }
  1295. if (head == tail) { /* queue empty */
  1296. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1297. return IRQ_NONE;
  1298. }
  1299. if (!dbc->qdev->single_msi)
  1300. disable_irq_nosync(irq);
  1301. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1302. return IRQ_WAKE_THREAD;
  1303. }
  1304. void qaic_irq_polling_work(struct work_struct *work)
  1305. {
  1306. struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work);
  1307. unsigned long flags;
  1308. int rcu_id;
  1309. u32 head;
  1310. u32 tail;
  1311. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1312. while (1) {
  1313. if (dbc->qdev->dev_state != QAIC_ONLINE) {
  1314. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1315. return;
  1316. }
  1317. if (!dbc->usr) {
  1318. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1319. return;
  1320. }
  1321. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1322. if (list_empty(&dbc->xfer_list)) {
  1323. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1324. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1325. return;
  1326. }
  1327. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1328. head = readl(dbc->dbc_base + RSPHP_OFF);
  1329. if (head == U32_MAX) { /* PCI link error */
  1330. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1331. return;
  1332. }
  1333. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1334. if (tail == U32_MAX) { /* PCI link error */
  1335. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1336. return;
  1337. }
  1338. if (head != tail) {
  1339. irq_wake_thread(dbc->irq, dbc);
  1340. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1341. return;
  1342. }
  1343. cond_resched();
  1344. usleep_range(datapath_poll_interval_us, 2 * datapath_poll_interval_us);
  1345. }
  1346. }
  1347. irqreturn_t dbc_irq_threaded_fn(int irq, void *data)
  1348. {
  1349. struct dma_bridge_chan *dbc = data;
  1350. int event_count = NUM_EVENTS;
  1351. int delay_count = NUM_DELAYS;
  1352. struct qaic_device *qdev;
  1353. struct qaic_bo *bo, *i;
  1354. struct dbc_rsp *rsp;
  1355. unsigned long flags;
  1356. int rcu_id;
  1357. u16 status;
  1358. u16 req_id;
  1359. u32 head;
  1360. u32 tail;
  1361. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1362. qdev = dbc->qdev;
  1363. head = readl(dbc->dbc_base + RSPHP_OFF);
  1364. if (head == U32_MAX) /* PCI link error */
  1365. goto error_out;
  1366. read_fifo:
  1367. if (!event_count) {
  1368. event_count = NUM_EVENTS;
  1369. cond_resched();
  1370. }
  1371. /*
  1372. * if this channel isn't assigned or gets unassigned during processing
  1373. * we have nothing further to do
  1374. */
  1375. if (!dbc->usr)
  1376. goto error_out;
  1377. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1378. if (tail == U32_MAX) /* PCI link error */
  1379. goto error_out;
  1380. if (head == tail) { /* queue empty */
  1381. if (delay_count) {
  1382. --delay_count;
  1383. usleep_range(100, 200);
  1384. goto read_fifo; /* check for a new event */
  1385. }
  1386. goto normal_out;
  1387. }
  1388. delay_count = NUM_DELAYS;
  1389. while (head != tail) {
  1390. if (!event_count)
  1391. break;
  1392. --event_count;
  1393. rsp = dbc->rsp_q_base + head * sizeof(*rsp);
  1394. req_id = le16_to_cpu(rsp->req_id);
  1395. status = le16_to_cpu(rsp->status);
  1396. if (status)
  1397. pci_dbg(qdev->pdev, "req_id %d failed with status %d\n", req_id, status);
  1398. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1399. /*
  1400. * A BO can receive multiple interrupts, since a BO can be
  1401. * divided into multiple slices and a buffer receives as many
  1402. * interrupts as slices. So until it receives interrupts for
  1403. * all the slices we cannot mark that buffer complete.
  1404. */
  1405. list_for_each_entry_safe(bo, i, &dbc->xfer_list, xfer_list) {
  1406. if (bo->req_id == req_id)
  1407. bo->nr_slice_xfer_done++;
  1408. else
  1409. continue;
  1410. if (bo->nr_slice_xfer_done < bo->nr_slice)
  1411. break;
  1412. /*
  1413. * At this point we have received all the interrupts for
  1414. * BO, which means BO execution is complete.
  1415. */
  1416. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
  1417. bo->nr_slice_xfer_done = 0;
  1418. list_del_init(&bo->xfer_list);
  1419. bo->perf_stats.req_processed_ts = ktime_get_ns();
  1420. complete_all(&bo->xfer_done);
  1421. drm_gem_object_put(&bo->base);
  1422. break;
  1423. }
  1424. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1425. head = (head + 1) % dbc->nelem;
  1426. }
  1427. /*
  1428. * Update the head pointer of response queue and let the device know
  1429. * that we have consumed elements from the queue.
  1430. */
  1431. writel(head, dbc->dbc_base + RSPHP_OFF);
  1432. /* elements might have been put in the queue while we were processing */
  1433. goto read_fifo;
  1434. normal_out:
  1435. if (!qdev->single_msi && likely(!datapath_polling))
  1436. enable_irq(irq);
  1437. else if (unlikely(datapath_polling))
  1438. schedule_work(&dbc->poll_work);
  1439. /* checking the fifo and enabling irqs is a race, missed event check */
  1440. tail = readl(dbc->dbc_base + RSPTP_OFF);
  1441. if (tail != U32_MAX && head != tail) {
  1442. if (!qdev->single_msi && likely(!datapath_polling))
  1443. disable_irq_nosync(irq);
  1444. goto read_fifo;
  1445. }
  1446. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1447. return IRQ_HANDLED;
  1448. error_out:
  1449. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1450. if (!qdev->single_msi && likely(!datapath_polling))
  1451. enable_irq(irq);
  1452. else if (unlikely(datapath_polling))
  1453. schedule_work(&dbc->poll_work);
  1454. return IRQ_HANDLED;
  1455. }
  1456. int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1457. {
  1458. struct qaic_wait *args = data;
  1459. int usr_rcu_id, qdev_rcu_id;
  1460. struct dma_bridge_chan *dbc;
  1461. struct drm_gem_object *obj;
  1462. struct qaic_device *qdev;
  1463. unsigned long timeout;
  1464. struct qaic_user *usr;
  1465. struct qaic_bo *bo;
  1466. int rcu_id;
  1467. int ret;
  1468. if (args->pad != 0)
  1469. return -EINVAL;
  1470. usr = file_priv->driver_priv;
  1471. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1472. if (!usr->qddev) {
  1473. ret = -ENODEV;
  1474. goto unlock_usr_srcu;
  1475. }
  1476. qdev = usr->qddev->qdev;
  1477. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1478. if (qdev->dev_state != QAIC_ONLINE) {
  1479. ret = -ENODEV;
  1480. goto unlock_dev_srcu;
  1481. }
  1482. if (args->dbc_id >= qdev->num_dbc) {
  1483. ret = -EINVAL;
  1484. goto unlock_dev_srcu;
  1485. }
  1486. dbc = &qdev->dbc[args->dbc_id];
  1487. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1488. if (dbc->usr != usr) {
  1489. ret = -EPERM;
  1490. goto unlock_ch_srcu;
  1491. }
  1492. if (dbc->id == qdev->ssr_dbc) {
  1493. ret = -EPIPE;
  1494. goto unlock_ch_srcu;
  1495. }
  1496. obj = drm_gem_object_lookup(file_priv, args->handle);
  1497. if (!obj) {
  1498. ret = -ENOENT;
  1499. goto unlock_ch_srcu;
  1500. }
  1501. bo = to_qaic_bo(obj);
  1502. timeout = args->timeout ? args->timeout : wait_exec_default_timeout_ms;
  1503. timeout = msecs_to_jiffies(timeout);
  1504. ret = wait_for_completion_interruptible_timeout(&bo->xfer_done, timeout);
  1505. if (!ret) {
  1506. ret = -ETIMEDOUT;
  1507. goto put_obj;
  1508. }
  1509. if (ret > 0)
  1510. ret = 0;
  1511. if (!dbc->usr)
  1512. ret = -EPERM;
  1513. if (dbc->id == qdev->ssr_dbc)
  1514. ret = -EPIPE;
  1515. put_obj:
  1516. drm_gem_object_put(obj);
  1517. unlock_ch_srcu:
  1518. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1519. unlock_dev_srcu:
  1520. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1521. unlock_usr_srcu:
  1522. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1523. return ret;
  1524. }
  1525. int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1526. {
  1527. struct qaic_perf_stats_entry *ent = NULL;
  1528. struct qaic_perf_stats *args = data;
  1529. int usr_rcu_id, qdev_rcu_id;
  1530. struct drm_gem_object *obj;
  1531. struct qaic_device *qdev;
  1532. struct qaic_user *usr;
  1533. struct qaic_bo *bo;
  1534. int ret = 0;
  1535. int i;
  1536. usr = file_priv->driver_priv;
  1537. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1538. if (!usr->qddev) {
  1539. ret = -ENODEV;
  1540. goto unlock_usr_srcu;
  1541. }
  1542. qdev = usr->qddev->qdev;
  1543. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1544. if (qdev->dev_state != QAIC_ONLINE) {
  1545. ret = -ENODEV;
  1546. goto unlock_dev_srcu;
  1547. }
  1548. if (args->hdr.dbc_id >= qdev->num_dbc) {
  1549. ret = -EINVAL;
  1550. goto unlock_dev_srcu;
  1551. }
  1552. ent = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, sizeof(*ent));
  1553. if (IS_ERR(ent)) {
  1554. ret = PTR_ERR(ent);
  1555. goto unlock_dev_srcu;
  1556. }
  1557. for (i = 0; i < args->hdr.count; i++) {
  1558. obj = drm_gem_object_lookup(file_priv, ent[i].handle);
  1559. if (!obj) {
  1560. ret = -ENOENT;
  1561. goto free_ent;
  1562. }
  1563. bo = to_qaic_bo(obj);
  1564. if (!bo->sliced) {
  1565. drm_gem_object_put(obj);
  1566. ret = -EINVAL;
  1567. goto free_ent;
  1568. }
  1569. if (bo->dbc->id != args->hdr.dbc_id) {
  1570. drm_gem_object_put(obj);
  1571. ret = -EINVAL;
  1572. goto free_ent;
  1573. }
  1574. /*
  1575. * perf stats ioctl is called before wait ioctl is complete then
  1576. * the latency information is invalid.
  1577. */
  1578. if (bo->perf_stats.req_processed_ts < bo->perf_stats.req_submit_ts) {
  1579. ent[i].device_latency_us = 0;
  1580. } else {
  1581. ent[i].device_latency_us = div_u64((bo->perf_stats.req_processed_ts -
  1582. bo->perf_stats.req_submit_ts), 1000);
  1583. }
  1584. ent[i].submit_latency_us = div_u64((bo->perf_stats.req_submit_ts -
  1585. bo->perf_stats.req_received_ts), 1000);
  1586. ent[i].queue_level_before = bo->perf_stats.queue_level_before;
  1587. ent[i].num_queue_element = bo->total_slice_nents;
  1588. drm_gem_object_put(obj);
  1589. }
  1590. if (copy_to_user(u64_to_user_ptr(args->data), ent, args->hdr.count * sizeof(*ent)))
  1591. ret = -EFAULT;
  1592. free_ent:
  1593. kfree(ent);
  1594. unlock_dev_srcu:
  1595. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1596. unlock_usr_srcu:
  1597. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1598. return ret;
  1599. }
  1600. static void detach_slice_bo(struct qaic_device *qdev, struct qaic_bo *bo)
  1601. {
  1602. qaic_free_slices_bo(bo);
  1603. qaic_unprepare_bo(qdev, bo);
  1604. qaic_init_bo(bo, true);
  1605. list_del(&bo->bo_list);
  1606. drm_gem_object_put(&bo->base);
  1607. }
  1608. int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
  1609. {
  1610. struct qaic_detach_slice *args = data;
  1611. int rcu_id, usr_rcu_id, qdev_rcu_id;
  1612. struct dma_bridge_chan *dbc;
  1613. struct drm_gem_object *obj;
  1614. struct qaic_device *qdev;
  1615. struct qaic_user *usr;
  1616. unsigned long flags;
  1617. struct qaic_bo *bo;
  1618. int ret;
  1619. if (args->pad != 0)
  1620. return -EINVAL;
  1621. usr = file_priv->driver_priv;
  1622. usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
  1623. if (!usr->qddev) {
  1624. ret = -ENODEV;
  1625. goto unlock_usr_srcu;
  1626. }
  1627. qdev = usr->qddev->qdev;
  1628. qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
  1629. if (qdev->dev_state != QAIC_ONLINE) {
  1630. ret = -ENODEV;
  1631. goto unlock_dev_srcu;
  1632. }
  1633. obj = drm_gem_object_lookup(file_priv, args->handle);
  1634. if (!obj) {
  1635. ret = -ENOENT;
  1636. goto unlock_dev_srcu;
  1637. }
  1638. bo = to_qaic_bo(obj);
  1639. ret = mutex_lock_interruptible(&bo->lock);
  1640. if (ret)
  1641. goto put_bo;
  1642. if (!bo->sliced) {
  1643. ret = -EINVAL;
  1644. goto unlock_bo;
  1645. }
  1646. dbc = bo->dbc;
  1647. rcu_id = srcu_read_lock(&dbc->ch_lock);
  1648. if (dbc->usr != usr) {
  1649. ret = -EINVAL;
  1650. goto unlock_ch_srcu;
  1651. }
  1652. /* Check if BO is committed to H/W for DMA */
  1653. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1654. if (bo_queued(bo)) {
  1655. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1656. ret = -EBUSY;
  1657. goto unlock_ch_srcu;
  1658. }
  1659. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1660. detach_slice_bo(qdev, bo);
  1661. unlock_ch_srcu:
  1662. srcu_read_unlock(&dbc->ch_lock, rcu_id);
  1663. unlock_bo:
  1664. mutex_unlock(&bo->lock);
  1665. put_bo:
  1666. drm_gem_object_put(obj);
  1667. unlock_dev_srcu:
  1668. srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
  1669. unlock_usr_srcu:
  1670. srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
  1671. return ret;
  1672. }
  1673. static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
  1674. {
  1675. unsigned long flags;
  1676. struct qaic_bo *bo;
  1677. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1678. while (!list_empty(&dbc->xfer_list)) {
  1679. bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list);
  1680. list_del_init(&bo->xfer_list);
  1681. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1682. bo->nr_slice_xfer_done = 0;
  1683. bo->req_id = 0;
  1684. bo->perf_stats.req_received_ts = 0;
  1685. bo->perf_stats.req_submit_ts = 0;
  1686. bo->perf_stats.req_processed_ts = 0;
  1687. bo->perf_stats.queue_level_before = 0;
  1688. dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
  1689. complete_all(&bo->xfer_done);
  1690. drm_gem_object_put(&bo->base);
  1691. spin_lock_irqsave(&dbc->xfer_lock, flags);
  1692. }
  1693. spin_unlock_irqrestore(&dbc->xfer_lock, flags);
  1694. }
  1695. static void sync_empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
  1696. {
  1697. empty_xfer_list(qdev, dbc);
  1698. synchronize_srcu(&dbc->ch_lock);
  1699. /*
  1700. * Threads holding channel lock, may add more elements in the xfer_list.
  1701. * Flush out these elements from xfer_list.
  1702. */
  1703. empty_xfer_list(qdev, dbc);
  1704. }
  1705. int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
  1706. {
  1707. if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle)
  1708. return -EPERM;
  1709. qdev->dbc[dbc_id].usr = NULL;
  1710. synchronize_srcu(&qdev->dbc[dbc_id].ch_lock);
  1711. return 0;
  1712. }
  1713. /**
  1714. * enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
  1715. * user. Add user context back to DBC to enable it. This function trusts the
  1716. * DBC ID passed and expects the DBC to be disabled.
  1717. * @qdev: qaic device handle
  1718. * @dbc_id: ID of the DBC
  1719. * @usr: User context
  1720. */
  1721. void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
  1722. {
  1723. qdev->dbc[dbc_id].usr = usr;
  1724. }
  1725. void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
  1726. {
  1727. struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
  1728. dbc->usr = NULL;
  1729. sync_empty_xfer_list(qdev, dbc);
  1730. }
  1731. void release_dbc(struct qaic_device *qdev, u32 dbc_id)
  1732. {
  1733. struct qaic_bo *bo, *bo_temp;
  1734. struct dma_bridge_chan *dbc;
  1735. dbc = &qdev->dbc[dbc_id];
  1736. if (!dbc->in_use)
  1737. return;
  1738. wakeup_dbc(qdev, dbc_id);
  1739. dma_free_coherent(&qdev->pdev->dev, dbc->total_size, dbc->req_q_base, dbc->dma_addr);
  1740. dbc->total_size = 0;
  1741. dbc->req_q_base = NULL;
  1742. dbc->dma_addr = 0;
  1743. dbc->nelem = 0;
  1744. dbc->usr = NULL;
  1745. list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) {
  1746. drm_gem_object_get(&bo->base);
  1747. mutex_lock(&bo->lock);
  1748. detach_slice_bo(qdev, bo);
  1749. mutex_unlock(&bo->lock);
  1750. drm_gem_object_put(&bo->base);
  1751. }
  1752. dbc->in_use = false;
  1753. wake_up(&dbc->dbc_release);
  1754. }
  1755. void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail)
  1756. {
  1757. if (!dbc || !head || !tail)
  1758. return;
  1759. *head = readl(dbc->dbc_base + REQHP_OFF);
  1760. *tail = readl(dbc->dbc_base + REQTP_OFF);
  1761. }
  1762. /*
  1763. * qaic_dbc_enter_ssr - Prepare to enter in sub system reset(SSR) for given DBC ID.
  1764. * @qdev: qaic device handle
  1765. * @dbc_id: ID of the DBC which will enter SSR
  1766. *
  1767. * The device will automatically deactivate the workload as not
  1768. * all errors can be silently recovered. The user will be
  1769. * notified and will need to decide the required recovery
  1770. * action to take.
  1771. */
  1772. void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id)
  1773. {
  1774. qdev->ssr_dbc = dbc_id;
  1775. release_dbc(qdev, dbc_id);
  1776. }
  1777. /*
  1778. * qaic_dbc_exit_ssr - Prepare to exit from sub system reset(SSR) for given DBC ID.
  1779. * @qdev: qaic device handle
  1780. *
  1781. * The DBC returns to an operational state and begins accepting work after exiting SSR.
  1782. */
  1783. void qaic_dbc_exit_ssr(struct qaic_device *qdev)
  1784. {
  1785. qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
  1786. }