dma-fence.c 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Fence mechanism for dma-buf and to allow for asynchronous dma access
  4. *
  5. * Copyright (C) 2012 Canonical Ltd
  6. * Copyright (C) 2012 Texas Instruments
  7. *
  8. * Authors:
  9. * Rob Clark <robdclark@gmail.com>
  10. * Maarten Lankhorst <maarten.lankhorst@canonical.com>
  11. */
  12. #include <linux/slab.h>
  13. #include <linux/export.h>
  14. #include <linux/atomic.h>
  15. #include <linux/dma-fence.h>
  16. #include <linux/sched/signal.h>
  17. #include <linux/seq_file.h>
  18. #define CREATE_TRACE_POINTS
  19. #include <trace/events/dma_fence.h>
  20. EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
  21. EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
  22. EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
  23. static DEFINE_SPINLOCK(dma_fence_stub_lock);
  24. static struct dma_fence dma_fence_stub;
  25. /*
  26. * fence context counter: each execution context should have its own
  27. * fence context, this allows checking if fences belong to the same
  28. * context or not. One device can have multiple separate contexts,
  29. * and they're used if some engine can run independently of another.
  30. */
  31. static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1);
  32. /**
  33. * DOC: DMA fences overview
  34. *
  35. * DMA fences, represented by &struct dma_fence, are the kernel internal
  36. * synchronization primitive for DMA operations like GPU rendering, video
  37. * encoding/decoding, or displaying buffers on a screen.
  38. *
  39. * A fence is initialized using dma_fence_init() and completed using
  40. * dma_fence_signal(). Fences are associated with a context, allocated through
  41. * dma_fence_context_alloc(), and all fences on the same context are
  42. * fully ordered.
  43. *
  44. * Since the purposes of fences is to facilitate cross-device and
  45. * cross-application synchronization, there's multiple ways to use one:
  46. *
  47. * - Individual fences can be exposed as a &sync_file, accessed as a file
  48. * descriptor from userspace, created by calling sync_file_create(). This is
  49. * called explicit fencing, since userspace passes around explicit
  50. * synchronization points.
  51. *
  52. * - Some subsystems also have their own explicit fencing primitives, like
  53. * &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying
  54. * fence to be updated.
  55. *
  56. * - Then there's also implicit fencing, where the synchronization points are
  57. * implicitly passed around as part of shared &dma_buf instances. Such
  58. * implicit fences are stored in &struct dma_resv through the
  59. * &dma_buf.resv pointer.
  60. */
  61. /**
  62. * DOC: fence cross-driver contract
  63. *
  64. * Since &dma_fence provide a cross driver contract, all drivers must follow the
  65. * same rules:
  66. *
  67. * * Fences must complete in a reasonable time. Fences which represent kernels
  68. * and shaders submitted by userspace, which could run forever, must be backed
  69. * up by timeout and gpu hang recovery code. Minimally that code must prevent
  70. * further command submission and force complete all in-flight fences, e.g.
  71. * when the driver or hardware do not support gpu reset, or if the gpu reset
  72. * failed for some reason. Ideally the driver supports gpu recovery which only
  73. * affects the offending userspace context, and no other userspace
  74. * submissions.
  75. *
  76. * * Drivers may have different ideas of what completion within a reasonable
  77. * time means. Some hang recovery code uses a fixed timeout, others a mix
  78. * between observing forward progress and increasingly strict timeouts.
  79. * Drivers should not try to second guess timeout handling of fences from
  80. * other drivers.
  81. *
  82. * * To ensure there's no deadlocks of dma_fence_wait() against other locks
  83. * drivers should annotate all code required to reach dma_fence_signal(),
  84. * which completes the fences, with dma_fence_begin_signalling() and
  85. * dma_fence_end_signalling().
  86. *
  87. * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock().
  88. * This means any code required for fence completion cannot acquire a
  89. * &dma_resv lock. Note that this also pulls in the entire established
  90. * locking hierarchy around dma_resv_lock() and dma_resv_unlock().
  91. *
  92. * * Drivers are allowed to call dma_fence_wait() from their &shrinker
  93. * callbacks. This means any code required for fence completion cannot
  94. * allocate memory with GFP_KERNEL.
  95. *
  96. * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier
  97. * respectively &mmu_interval_notifier callbacks. This means any code required
  98. * for fence completion cannot allocate memory with GFP_NOFS or GFP_NOIO.
  99. * Only GFP_ATOMIC is permissible, which might fail.
  100. *
  101. * Note that only GPU drivers have a reasonable excuse for both requiring
  102. * &mmu_interval_notifier and &shrinker callbacks at the same time as having to
  103. * track asynchronous compute work using &dma_fence. No driver outside of
  104. * drivers/gpu should ever call dma_fence_wait() in such contexts.
  105. */
  106. static const char *dma_fence_stub_get_name(struct dma_fence *fence)
  107. {
  108. return "stub";
  109. }
  110. static const struct dma_fence_ops dma_fence_stub_ops = {
  111. .get_driver_name = dma_fence_stub_get_name,
  112. .get_timeline_name = dma_fence_stub_get_name,
  113. };
  114. static int __init dma_fence_init_stub(void)
  115. {
  116. dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops,
  117. &dma_fence_stub_lock, 0, 0);
  118. set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
  119. &dma_fence_stub.flags);
  120. dma_fence_signal(&dma_fence_stub);
  121. return 0;
  122. }
  123. subsys_initcall(dma_fence_init_stub);
  124. /**
  125. * dma_fence_get_stub - return a signaled fence
  126. *
  127. * Return a stub fence which is already signaled. The fence's timestamp
  128. * corresponds to the initialisation time of the linux kernel.
  129. */
  130. struct dma_fence *dma_fence_get_stub(void)
  131. {
  132. return dma_fence_get(&dma_fence_stub);
  133. }
  134. EXPORT_SYMBOL(dma_fence_get_stub);
  135. /**
  136. * dma_fence_allocate_private_stub - return a private, signaled fence
  137. * @timestamp: timestamp when the fence was signaled
  138. *
  139. * Return a newly allocated and signaled stub fence.
  140. */
  141. struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp)
  142. {
  143. struct dma_fence *fence;
  144. fence = kzalloc_obj(*fence);
  145. if (fence == NULL)
  146. return NULL;
  147. dma_fence_init(fence,
  148. &dma_fence_stub_ops,
  149. &dma_fence_stub_lock,
  150. 0, 0);
  151. set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
  152. &fence->flags);
  153. dma_fence_signal_timestamp(fence, timestamp);
  154. return fence;
  155. }
  156. EXPORT_SYMBOL(dma_fence_allocate_private_stub);
  157. /**
  158. * dma_fence_context_alloc - allocate an array of fence contexts
  159. * @num: amount of contexts to allocate
  160. *
  161. * This function will return the first index of the number of fence contexts
  162. * allocated. The fence context is used for setting &dma_fence.context to a
  163. * unique number by passing the context to dma_fence_init().
  164. */
  165. u64 dma_fence_context_alloc(unsigned num)
  166. {
  167. WARN_ON(!num);
  168. return atomic64_fetch_add(num, &dma_fence_context_counter);
  169. }
  170. EXPORT_SYMBOL(dma_fence_context_alloc);
  171. /**
  172. * DOC: fence signalling annotation
  173. *
  174. * Proving correctness of all the kernel code around &dma_fence through code
  175. * review and testing is tricky for a few reasons:
  176. *
  177. * * It is a cross-driver contract, and therefore all drivers must follow the
  178. * same rules for lock nesting order, calling contexts for various functions
  179. * and anything else significant for in-kernel interfaces. But it is also
  180. * impossible to test all drivers in a single machine, hence brute-force N vs.
  181. * N testing of all combinations is impossible. Even just limiting to the
  182. * possible combinations is infeasible.
  183. *
  184. * * There is an enormous amount of driver code involved. For render drivers
  185. * there's the tail of command submission, after fences are published,
  186. * scheduler code, interrupt and workers to process job completion,
  187. * and timeout, gpu reset and gpu hang recovery code. Plus for integration
  188. * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier,
  189. * and &shrinker. For modesetting drivers there's the commit tail functions
  190. * between when fences for an atomic modeset are published, and when the
  191. * corresponding vblank completes, including any interrupt processing and
  192. * related workers. Auditing all that code, across all drivers, is not
  193. * feasible.
  194. *
  195. * * Due to how many other subsystems are involved and the locking hierarchies
  196. * this pulls in there is extremely thin wiggle-room for driver-specific
  197. * differences. &dma_fence interacts with almost all of the core memory
  198. * handling through page fault handlers via &dma_resv, dma_resv_lock() and
  199. * dma_resv_unlock(). On the other side it also interacts through all
  200. * allocation sites through &mmu_notifier and &shrinker.
  201. *
  202. * Furthermore lockdep does not handle cross-release dependencies, which means
  203. * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught
  204. * at runtime with some quick testing. The simplest example is one thread
  205. * waiting on a &dma_fence while holding a lock::
  206. *
  207. * lock(A);
  208. * dma_fence_wait(B);
  209. * unlock(A);
  210. *
  211. * while the other thread is stuck trying to acquire the same lock, which
  212. * prevents it from signalling the fence the previous thread is stuck waiting
  213. * on::
  214. *
  215. * lock(A);
  216. * unlock(A);
  217. * dma_fence_signal(B);
  218. *
  219. * By manually annotating all code relevant to signalling a &dma_fence we can
  220. * teach lockdep about these dependencies, which also helps with the validation
  221. * headache since now lockdep can check all the rules for us::
  222. *
  223. * cookie = dma_fence_begin_signalling();
  224. * lock(A);
  225. * unlock(A);
  226. * dma_fence_signal(B);
  227. * dma_fence_end_signalling(cookie);
  228. *
  229. * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to
  230. * annotate critical sections the following rules need to be observed:
  231. *
  232. * * All code necessary to complete a &dma_fence must be annotated, from the
  233. * point where a fence is accessible to other threads, to the point where
  234. * dma_fence_signal() is called. Un-annotated code can contain deadlock issues,
  235. * and due to the very strict rules and many corner cases it is infeasible to
  236. * catch these just with review or normal stress testing.
  237. *
  238. * * &struct dma_resv deserves a special note, since the readers are only
  239. * protected by rcu. This means the signalling critical section starts as soon
  240. * as the new fences are installed, even before dma_resv_unlock() is called.
  241. *
  242. * * The only exception are fast paths and opportunistic signalling code, which
  243. * calls dma_fence_signal() purely as an optimization, but is not required to
  244. * guarantee completion of a &dma_fence. The usual example is a wait IOCTL
  245. * which calls dma_fence_signal(), while the mandatory completion path goes
  246. * through a hardware interrupt and possible job completion worker.
  247. *
  248. * * To aid composability of code, the annotations can be freely nested, as long
  249. * as the overall locking hierarchy is consistent. The annotations also work
  250. * both in interrupt and process context. Due to implementation details this
  251. * requires that callers pass an opaque cookie from
  252. * dma_fence_begin_signalling() to dma_fence_end_signalling().
  253. *
  254. * * Validation against the cross driver contract is implemented by priming
  255. * lockdep with the relevant hierarchy at boot-up. This means even just
  256. * testing with a single device is enough to validate a driver, at least as
  257. * far as deadlocks with dma_fence_wait() against dma_fence_signal() are
  258. * concerned.
  259. */
  260. #ifdef CONFIG_LOCKDEP
  261. static struct lockdep_map dma_fence_lockdep_map = {
  262. .name = "dma_fence_map"
  263. };
  264. /**
  265. * dma_fence_begin_signalling - begin a critical DMA fence signalling section
  266. *
  267. * Drivers should use this to annotate the beginning of any code section
  268. * required to eventually complete &dma_fence by calling dma_fence_signal().
  269. *
  270. * The end of these critical sections are annotated with
  271. * dma_fence_end_signalling().
  272. *
  273. * Returns:
  274. *
  275. * Opaque cookie needed by the implementation, which needs to be passed to
  276. * dma_fence_end_signalling().
  277. */
  278. bool dma_fence_begin_signalling(void)
  279. {
  280. /* explicitly nesting ... */
  281. if (lock_is_held_type(&dma_fence_lockdep_map, 1))
  282. return true;
  283. /* rely on might_sleep check for soft/hardirq locks */
  284. if (in_atomic())
  285. return true;
  286. /* ... and non-recursive successful read_trylock */
  287. lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _RET_IP_);
  288. return false;
  289. }
  290. EXPORT_SYMBOL(dma_fence_begin_signalling);
  291. /**
  292. * dma_fence_end_signalling - end a critical DMA fence signalling section
  293. * @cookie: opaque cookie from dma_fence_begin_signalling()
  294. *
  295. * Closes a critical section annotation opened by dma_fence_begin_signalling().
  296. */
  297. void dma_fence_end_signalling(bool cookie)
  298. {
  299. if (cookie)
  300. return;
  301. lock_release(&dma_fence_lockdep_map, _RET_IP_);
  302. }
  303. EXPORT_SYMBOL(dma_fence_end_signalling);
  304. void __dma_fence_might_wait(void)
  305. {
  306. bool tmp;
  307. tmp = lock_is_held_type(&dma_fence_lockdep_map, 1);
  308. if (tmp)
  309. lock_release(&dma_fence_lockdep_map, _THIS_IP_);
  310. lock_map_acquire(&dma_fence_lockdep_map);
  311. lock_map_release(&dma_fence_lockdep_map);
  312. if (tmp)
  313. lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _THIS_IP_);
  314. }
  315. #endif
  316. /**
  317. * dma_fence_signal_timestamp_locked - signal completion of a fence
  318. * @fence: the fence to signal
  319. * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
  320. *
  321. * Signal completion for software callbacks on a fence, this will unblock
  322. * dma_fence_wait() calls and run all the callbacks added with
  323. * dma_fence_add_callback(). Can be called multiple times, but since a fence
  324. * can only go from the unsignaled to the signaled state and not back, it will
  325. * only be effective the first time. Set the timestamp provided as the fence
  326. * signal timestamp.
  327. *
  328. * Unlike dma_fence_signal_timestamp(), this function must be called with
  329. * &dma_fence.lock held.
  330. */
  331. void dma_fence_signal_timestamp_locked(struct dma_fence *fence,
  332. ktime_t timestamp)
  333. {
  334. struct dma_fence_cb *cur, *tmp;
  335. struct list_head cb_list;
  336. lockdep_assert_held(fence->lock);
  337. if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
  338. &fence->flags)))
  339. return;
  340. /* Stash the cb_list before replacing it with the timestamp */
  341. list_replace(&fence->cb_list, &cb_list);
  342. fence->timestamp = timestamp;
  343. set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
  344. trace_dma_fence_signaled(fence);
  345. list_for_each_entry_safe(cur, tmp, &cb_list, node) {
  346. INIT_LIST_HEAD(&cur->node);
  347. cur->func(fence, cur);
  348. }
  349. }
  350. EXPORT_SYMBOL(dma_fence_signal_timestamp_locked);
  351. /**
  352. * dma_fence_signal_timestamp - signal completion of a fence
  353. * @fence: the fence to signal
  354. * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
  355. *
  356. * Signal completion for software callbacks on a fence, this will unblock
  357. * dma_fence_wait() calls and run all the callbacks added with
  358. * dma_fence_add_callback(). Can be called multiple times, but since a fence
  359. * can only go from the unsignaled to the signaled state and not back, it will
  360. * only be effective the first time. Set the timestamp provided as the fence
  361. * signal timestamp.
  362. */
  363. void dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp)
  364. {
  365. unsigned long flags;
  366. if (WARN_ON(!fence))
  367. return;
  368. spin_lock_irqsave(fence->lock, flags);
  369. dma_fence_signal_timestamp_locked(fence, timestamp);
  370. spin_unlock_irqrestore(fence->lock, flags);
  371. }
  372. EXPORT_SYMBOL(dma_fence_signal_timestamp);
  373. /**
  374. * dma_fence_signal_locked - signal completion of a fence
  375. * @fence: the fence to signal
  376. *
  377. * Signal completion for software callbacks on a fence, this will unblock
  378. * dma_fence_wait() calls and run all the callbacks added with
  379. * dma_fence_add_callback(). Can be called multiple times, but since a fence
  380. * can only go from the unsignaled to the signaled state and not back, it will
  381. * only be effective the first time.
  382. *
  383. * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock
  384. * held.
  385. */
  386. void dma_fence_signal_locked(struct dma_fence *fence)
  387. {
  388. dma_fence_signal_timestamp_locked(fence, ktime_get());
  389. }
  390. EXPORT_SYMBOL(dma_fence_signal_locked);
  391. /**
  392. * dma_fence_check_and_signal_locked - signal the fence if it's not yet signaled
  393. * @fence: the fence to check and signal
  394. *
  395. * Checks whether a fence was signaled and signals it if it was not yet signaled.
  396. *
  397. * Unlike dma_fence_check_and_signal(), this function must be called with
  398. * &struct dma_fence.lock being held.
  399. *
  400. * Return: true if fence has been signaled already, false otherwise.
  401. */
  402. bool dma_fence_check_and_signal_locked(struct dma_fence *fence)
  403. {
  404. bool ret;
  405. ret = dma_fence_test_signaled_flag(fence);
  406. dma_fence_signal_locked(fence);
  407. return ret;
  408. }
  409. EXPORT_SYMBOL(dma_fence_check_and_signal_locked);
  410. /**
  411. * dma_fence_check_and_signal - signal the fence if it's not yet signaled
  412. * @fence: the fence to check and signal
  413. *
  414. * Checks whether a fence was signaled and signals it if it was not yet signaled.
  415. * All this is done in a race-free manner.
  416. *
  417. * Return: true if fence has been signaled already, false otherwise.
  418. */
  419. bool dma_fence_check_and_signal(struct dma_fence *fence)
  420. {
  421. unsigned long flags;
  422. bool ret;
  423. spin_lock_irqsave(fence->lock, flags);
  424. ret = dma_fence_check_and_signal_locked(fence);
  425. spin_unlock_irqrestore(fence->lock, flags);
  426. return ret;
  427. }
  428. EXPORT_SYMBOL(dma_fence_check_and_signal);
  429. /**
  430. * dma_fence_signal - signal completion of a fence
  431. * @fence: the fence to signal
  432. *
  433. * Signal completion for software callbacks on a fence, this will unblock
  434. * dma_fence_wait() calls and run all the callbacks added with
  435. * dma_fence_add_callback(). Can be called multiple times, but since a fence
  436. * can only go from the unsignaled to the signaled state and not back, it will
  437. * only be effective the first time.
  438. */
  439. void dma_fence_signal(struct dma_fence *fence)
  440. {
  441. unsigned long flags;
  442. bool tmp;
  443. if (WARN_ON(!fence))
  444. return;
  445. tmp = dma_fence_begin_signalling();
  446. spin_lock_irqsave(fence->lock, flags);
  447. dma_fence_signal_timestamp_locked(fence, ktime_get());
  448. spin_unlock_irqrestore(fence->lock, flags);
  449. dma_fence_end_signalling(tmp);
  450. }
  451. EXPORT_SYMBOL(dma_fence_signal);
  452. /**
  453. * dma_fence_wait_timeout - sleep until the fence gets signaled
  454. * or until timeout elapses
  455. * @fence: the fence to wait on
  456. * @intr: if true, do an interruptible wait
  457. * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
  458. *
  459. * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
  460. * remaining timeout in jiffies on success. Other error values may be
  461. * returned on custom implementations.
  462. *
  463. * Performs a synchronous wait on this fence. It is assumed the caller
  464. * directly or indirectly (buf-mgr between reservation and committing)
  465. * holds a reference to the fence, otherwise the fence might be
  466. * freed before return, resulting in undefined behavior.
  467. *
  468. * See also dma_fence_wait() and dma_fence_wait_any_timeout().
  469. */
  470. signed long
  471. dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
  472. {
  473. signed long ret;
  474. if (WARN_ON(timeout < 0))
  475. return -EINVAL;
  476. might_sleep();
  477. __dma_fence_might_wait();
  478. dma_fence_enable_sw_signaling(fence);
  479. if (trace_dma_fence_wait_start_enabled()) {
  480. rcu_read_lock();
  481. trace_dma_fence_wait_start(fence);
  482. rcu_read_unlock();
  483. }
  484. if (fence->ops->wait)
  485. ret = fence->ops->wait(fence, intr, timeout);
  486. else
  487. ret = dma_fence_default_wait(fence, intr, timeout);
  488. if (trace_dma_fence_wait_end_enabled()) {
  489. rcu_read_lock();
  490. trace_dma_fence_wait_end(fence);
  491. rcu_read_unlock();
  492. }
  493. return ret;
  494. }
  495. EXPORT_SYMBOL(dma_fence_wait_timeout);
  496. /**
  497. * dma_fence_release - default release function for fences
  498. * @kref: &dma_fence.recfount
  499. *
  500. * This is the default release functions for &dma_fence. Drivers shouldn't call
  501. * this directly, but instead call dma_fence_put().
  502. */
  503. void dma_fence_release(struct kref *kref)
  504. {
  505. struct dma_fence *fence =
  506. container_of(kref, struct dma_fence, refcount);
  507. rcu_read_lock();
  508. trace_dma_fence_destroy(fence);
  509. if (!list_empty(&fence->cb_list) &&
  510. !dma_fence_test_signaled_flag(fence)) {
  511. const char __rcu *timeline;
  512. const char __rcu *driver;
  513. unsigned long flags;
  514. driver = dma_fence_driver_name(fence);
  515. timeline = dma_fence_timeline_name(fence);
  516. WARN(1,
  517. "Fence %s:%s:%llx:%llx released with pending signals!\n",
  518. rcu_dereference(driver), rcu_dereference(timeline),
  519. fence->context, fence->seqno);
  520. /*
  521. * Failed to signal before release, likely a refcounting issue.
  522. *
  523. * This should never happen, but if it does make sure that we
  524. * don't leave chains dangling. We set the error flag first
  525. * so that the callbacks know this signal is due to an error.
  526. */
  527. spin_lock_irqsave(fence->lock, flags);
  528. fence->error = -EDEADLK;
  529. dma_fence_signal_locked(fence);
  530. spin_unlock_irqrestore(fence->lock, flags);
  531. }
  532. rcu_read_unlock();
  533. if (fence->ops->release)
  534. fence->ops->release(fence);
  535. else
  536. dma_fence_free(fence);
  537. }
  538. EXPORT_SYMBOL(dma_fence_release);
  539. /**
  540. * dma_fence_free - default release function for &dma_fence.
  541. * @fence: fence to release
  542. *
  543. * This is the default implementation for &dma_fence_ops.release. It calls
  544. * kfree_rcu() on @fence.
  545. */
  546. void dma_fence_free(struct dma_fence *fence)
  547. {
  548. kfree_rcu(fence, rcu);
  549. }
  550. EXPORT_SYMBOL(dma_fence_free);
  551. static bool __dma_fence_enable_signaling(struct dma_fence *fence)
  552. {
  553. bool was_set;
  554. lockdep_assert_held(fence->lock);
  555. was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
  556. &fence->flags);
  557. if (dma_fence_test_signaled_flag(fence))
  558. return false;
  559. if (!was_set && fence->ops->enable_signaling) {
  560. trace_dma_fence_enable_signal(fence);
  561. if (!fence->ops->enable_signaling(fence)) {
  562. dma_fence_signal_locked(fence);
  563. return false;
  564. }
  565. }
  566. return true;
  567. }
  568. /**
  569. * dma_fence_enable_sw_signaling - enable signaling on fence
  570. * @fence: the fence to enable
  571. *
  572. * This will request for sw signaling to be enabled, to make the fence
  573. * complete as soon as possible. This calls &dma_fence_ops.enable_signaling
  574. * internally.
  575. */
  576. void dma_fence_enable_sw_signaling(struct dma_fence *fence)
  577. {
  578. unsigned long flags;
  579. spin_lock_irqsave(fence->lock, flags);
  580. __dma_fence_enable_signaling(fence);
  581. spin_unlock_irqrestore(fence->lock, flags);
  582. }
  583. EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
  584. /**
  585. * dma_fence_add_callback - add a callback to be called when the fence
  586. * is signaled
  587. * @fence: the fence to wait on
  588. * @cb: the callback to register
  589. * @func: the function to call
  590. *
  591. * Add a software callback to the fence. The caller should keep a reference to
  592. * the fence.
  593. *
  594. * @cb will be initialized by dma_fence_add_callback(), no initialization
  595. * by the caller is required. Any number of callbacks can be registered
  596. * to a fence, but a callback can only be registered to one fence at a time.
  597. *
  598. * If fence is already signaled, this function will return -ENOENT (and
  599. * *not* call the callback).
  600. *
  601. * Note that the callback can be called from an atomic context or irq context.
  602. *
  603. * Returns 0 in case of success, -ENOENT if the fence is already signaled
  604. * and -EINVAL in case of error.
  605. */
  606. int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
  607. dma_fence_func_t func)
  608. {
  609. unsigned long flags;
  610. int ret = 0;
  611. if (WARN_ON(!fence || !func))
  612. return -EINVAL;
  613. if (dma_fence_test_signaled_flag(fence)) {
  614. INIT_LIST_HEAD(&cb->node);
  615. return -ENOENT;
  616. }
  617. spin_lock_irqsave(fence->lock, flags);
  618. if (__dma_fence_enable_signaling(fence)) {
  619. cb->func = func;
  620. list_add_tail(&cb->node, &fence->cb_list);
  621. } else {
  622. INIT_LIST_HEAD(&cb->node);
  623. ret = -ENOENT;
  624. }
  625. spin_unlock_irqrestore(fence->lock, flags);
  626. return ret;
  627. }
  628. EXPORT_SYMBOL(dma_fence_add_callback);
  629. /**
  630. * dma_fence_get_status - returns the status upon completion
  631. * @fence: the dma_fence to query
  632. *
  633. * This wraps dma_fence_get_status_locked() to return the error status
  634. * condition on a signaled fence. See dma_fence_get_status_locked() for more
  635. * details.
  636. *
  637. * Returns 0 if the fence has not yet been signaled, 1 if the fence has
  638. * been signaled without an error condition, or a negative error code
  639. * if the fence has been completed in err.
  640. */
  641. int dma_fence_get_status(struct dma_fence *fence)
  642. {
  643. unsigned long flags;
  644. int status;
  645. spin_lock_irqsave(fence->lock, flags);
  646. status = dma_fence_get_status_locked(fence);
  647. spin_unlock_irqrestore(fence->lock, flags);
  648. return status;
  649. }
  650. EXPORT_SYMBOL(dma_fence_get_status);
  651. /**
  652. * dma_fence_remove_callback - remove a callback from the signaling list
  653. * @fence: the fence to wait on
  654. * @cb: the callback to remove
  655. *
  656. * Remove a previously queued callback from the fence. This function returns
  657. * true if the callback is successfully removed, or false if the fence has
  658. * already been signaled.
  659. *
  660. * *WARNING*:
  661. * Cancelling a callback should only be done if you really know what you're
  662. * doing, since deadlocks and race conditions could occur all too easily. For
  663. * this reason, it should only ever be done on hardware lockup recovery,
  664. * with a reference held to the fence.
  665. *
  666. * Behaviour is undefined if @cb has not been added to @fence using
  667. * dma_fence_add_callback() beforehand.
  668. */
  669. bool
  670. dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
  671. {
  672. unsigned long flags;
  673. bool ret;
  674. spin_lock_irqsave(fence->lock, flags);
  675. ret = !list_empty(&cb->node);
  676. if (ret)
  677. list_del_init(&cb->node);
  678. spin_unlock_irqrestore(fence->lock, flags);
  679. return ret;
  680. }
  681. EXPORT_SYMBOL(dma_fence_remove_callback);
  682. struct default_wait_cb {
  683. struct dma_fence_cb base;
  684. struct task_struct *task;
  685. };
  686. static void
  687. dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
  688. {
  689. struct default_wait_cb *wait =
  690. container_of(cb, struct default_wait_cb, base);
  691. wake_up_state(wait->task, TASK_NORMAL);
  692. }
  693. /**
  694. * dma_fence_default_wait - default sleep until the fence gets signaled
  695. * or until timeout elapses
  696. * @fence: the fence to wait on
  697. * @intr: if true, do an interruptible wait
  698. * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
  699. *
  700. * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
  701. * remaining timeout in jiffies on success. If timeout is zero the value one is
  702. * returned if the fence is already signaled for consistency with other
  703. * functions taking a jiffies timeout.
  704. */
  705. signed long
  706. dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
  707. {
  708. struct default_wait_cb cb;
  709. unsigned long flags;
  710. signed long ret = timeout ? timeout : 1;
  711. spin_lock_irqsave(fence->lock, flags);
  712. if (dma_fence_test_signaled_flag(fence))
  713. goto out;
  714. if (intr && signal_pending(current)) {
  715. ret = -ERESTARTSYS;
  716. goto out;
  717. }
  718. if (!timeout) {
  719. ret = 0;
  720. goto out;
  721. }
  722. cb.base.func = dma_fence_default_wait_cb;
  723. cb.task = current;
  724. list_add(&cb.base.node, &fence->cb_list);
  725. while (!dma_fence_test_signaled_flag(fence) && ret > 0) {
  726. if (intr)
  727. __set_current_state(TASK_INTERRUPTIBLE);
  728. else
  729. __set_current_state(TASK_UNINTERRUPTIBLE);
  730. spin_unlock_irqrestore(fence->lock, flags);
  731. ret = schedule_timeout(ret);
  732. spin_lock_irqsave(fence->lock, flags);
  733. if (ret > 0 && intr && signal_pending(current))
  734. ret = -ERESTARTSYS;
  735. }
  736. if (!list_empty(&cb.base.node))
  737. list_del(&cb.base.node);
  738. __set_current_state(TASK_RUNNING);
  739. out:
  740. spin_unlock_irqrestore(fence->lock, flags);
  741. return ret;
  742. }
  743. EXPORT_SYMBOL(dma_fence_default_wait);
  744. static bool
  745. dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count,
  746. uint32_t *idx)
  747. {
  748. int i;
  749. for (i = 0; i < count; ++i) {
  750. struct dma_fence *fence = fences[i];
  751. if (dma_fence_test_signaled_flag(fence)) {
  752. if (idx)
  753. *idx = i;
  754. return true;
  755. }
  756. }
  757. return false;
  758. }
  759. /**
  760. * dma_fence_wait_any_timeout - sleep until any fence gets signaled
  761. * or until timeout elapses
  762. * @fences: array of fences to wait on
  763. * @count: number of fences to wait on
  764. * @intr: if true, do an interruptible wait
  765. * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
  766. * @idx: used to store the first signaled fence index, meaningful only on
  767. * positive return
  768. *
  769. * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
  770. * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
  771. * on success.
  772. *
  773. * Synchronous waits for the first fence in the array to be signaled. The
  774. * caller needs to hold a reference to all fences in the array, otherwise a
  775. * fence might be freed before return, resulting in undefined behavior.
  776. *
  777. * See also dma_fence_wait() and dma_fence_wait_timeout().
  778. */
  779. signed long
  780. dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
  781. bool intr, signed long timeout, uint32_t *idx)
  782. {
  783. struct default_wait_cb *cb;
  784. signed long ret = timeout;
  785. unsigned i;
  786. if (WARN_ON(!fences || !count || timeout < 0))
  787. return -EINVAL;
  788. if (timeout == 0) {
  789. for (i = 0; i < count; ++i)
  790. if (dma_fence_is_signaled(fences[i])) {
  791. if (idx)
  792. *idx = i;
  793. return 1;
  794. }
  795. return 0;
  796. }
  797. cb = kzalloc_objs(struct default_wait_cb, count);
  798. if (cb == NULL) {
  799. ret = -ENOMEM;
  800. goto err_free_cb;
  801. }
  802. for (i = 0; i < count; ++i) {
  803. struct dma_fence *fence = fences[i];
  804. cb[i].task = current;
  805. if (dma_fence_add_callback(fence, &cb[i].base,
  806. dma_fence_default_wait_cb)) {
  807. /* This fence is already signaled */
  808. if (idx)
  809. *idx = i;
  810. goto fence_rm_cb;
  811. }
  812. }
  813. while (ret > 0) {
  814. if (intr)
  815. set_current_state(TASK_INTERRUPTIBLE);
  816. else
  817. set_current_state(TASK_UNINTERRUPTIBLE);
  818. if (dma_fence_test_signaled_any(fences, count, idx))
  819. break;
  820. ret = schedule_timeout(ret);
  821. if (ret > 0 && intr && signal_pending(current))
  822. ret = -ERESTARTSYS;
  823. }
  824. __set_current_state(TASK_RUNNING);
  825. fence_rm_cb:
  826. while (i-- > 0)
  827. dma_fence_remove_callback(fences[i], &cb[i].base);
  828. err_free_cb:
  829. kfree(cb);
  830. return ret;
  831. }
  832. EXPORT_SYMBOL(dma_fence_wait_any_timeout);
  833. /**
  834. * DOC: deadline hints
  835. *
  836. * In an ideal world, it would be possible to pipeline a workload sufficiently
  837. * that a utilization based device frequency governor could arrive at a minimum
  838. * frequency that meets the requirements of the use-case, in order to minimize
  839. * power consumption. But in the real world there are many workloads which
  840. * defy this ideal. For example, but not limited to:
  841. *
  842. * * Workloads that ping-pong between device and CPU, with alternating periods
  843. * of CPU waiting for device, and device waiting on CPU. This can result in
  844. * devfreq and cpufreq seeing idle time in their respective domains and in
  845. * result reduce frequency.
  846. *
  847. * * Workloads that interact with a periodic time based deadline, such as double
  848. * buffered GPU rendering vs vblank sync'd page flipping. In this scenario,
  849. * missing a vblank deadline results in an *increase* in idle time on the GPU
  850. * (since it has to wait an additional vblank period), sending a signal to
  851. * the GPU's devfreq to reduce frequency, when in fact the opposite is what is
  852. * needed.
  853. *
  854. * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline
  855. * (or indirectly via userspace facing ioctls like &sync_set_deadline).
  856. * The deadline hint provides a way for the waiting driver, or userspace, to
  857. * convey an appropriate sense of urgency to the signaling driver.
  858. *
  859. * A deadline hint is given in absolute ktime (CLOCK_MONOTONIC for userspace
  860. * facing APIs). The time could either be some point in the future (such as
  861. * the vblank based deadline for page-flipping, or the start of a compositor's
  862. * composition cycle), or the current time to indicate an immediate deadline
  863. * hint (Ie. forward progress cannot be made until this fence is signaled).
  864. *
  865. * Multiple deadlines may be set on a given fence, even in parallel. See the
  866. * documentation for &dma_fence_ops.set_deadline.
  867. *
  868. * The deadline hint is just that, a hint. The driver that created the fence
  869. * may react by increasing frequency, making different scheduling choices, etc.
  870. * Or doing nothing at all.
  871. */
  872. /**
  873. * dma_fence_set_deadline - set desired fence-wait deadline hint
  874. * @fence: the fence that is to be waited on
  875. * @deadline: the time by which the waiter hopes for the fence to be
  876. * signaled
  877. *
  878. * Give the fence signaler a hint about an upcoming deadline, such as
  879. * vblank, by which point the waiter would prefer the fence to be
  880. * signaled by. This is intended to give feedback to the fence signaler
  881. * to aid in power management decisions, such as boosting GPU frequency
  882. * if a periodic vblank deadline is approaching but the fence is not
  883. * yet signaled..
  884. */
  885. void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
  886. {
  887. if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
  888. fence->ops->set_deadline(fence, deadline);
  889. }
  890. EXPORT_SYMBOL(dma_fence_set_deadline);
  891. /**
  892. * dma_fence_describe - Dump fence description into seq_file
  893. * @fence: the fence to describe
  894. * @seq: the seq_file to put the textual description into
  895. *
  896. * Dump a textual description of the fence and it's state into the seq_file.
  897. */
  898. void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq)
  899. {
  900. const char __rcu *timeline = "";
  901. const char __rcu *driver = "";
  902. const char *signaled = "";
  903. rcu_read_lock();
  904. if (!dma_fence_is_signaled(fence)) {
  905. timeline = dma_fence_timeline_name(fence);
  906. driver = dma_fence_driver_name(fence);
  907. signaled = "un";
  908. }
  909. seq_printf(seq, "%llu:%llu %s %s %ssignalled\n",
  910. fence->context, fence->seqno, timeline, driver,
  911. signaled);
  912. rcu_read_unlock();
  913. }
  914. EXPORT_SYMBOL(dma_fence_describe);
  915. static void
  916. __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
  917. spinlock_t *lock, u64 context, u64 seqno, unsigned long flags)
  918. {
  919. BUG_ON(!lock);
  920. BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
  921. kref_init(&fence->refcount);
  922. fence->ops = ops;
  923. INIT_LIST_HEAD(&fence->cb_list);
  924. fence->lock = lock;
  925. fence->context = context;
  926. fence->seqno = seqno;
  927. fence->flags = flags;
  928. fence->error = 0;
  929. trace_dma_fence_init(fence);
  930. }
  931. /**
  932. * dma_fence_init - Initialize a custom fence.
  933. * @fence: the fence to initialize
  934. * @ops: the dma_fence_ops for operations on this fence
  935. * @lock: the irqsafe spinlock to use for locking this fence
  936. * @context: the execution context this fence is run on
  937. * @seqno: a linear increasing sequence number for this context
  938. *
  939. * Initializes an allocated fence, the caller doesn't have to keep its
  940. * refcount after committing with this fence, but it will need to hold a
  941. * refcount again if &dma_fence_ops.enable_signaling gets called.
  942. *
  943. * context and seqno are used for easy comparison between fences, allowing
  944. * to check which fence is later by simply using dma_fence_later().
  945. */
  946. void
  947. dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
  948. spinlock_t *lock, u64 context, u64 seqno)
  949. {
  950. __dma_fence_init(fence, ops, lock, context, seqno, 0UL);
  951. }
  952. EXPORT_SYMBOL(dma_fence_init);
  953. /**
  954. * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support.
  955. * @fence: the fence to initialize
  956. * @ops: the dma_fence_ops for operations on this fence
  957. * @lock: the irqsafe spinlock to use for locking this fence
  958. * @context: the execution context this fence is run on
  959. * @seqno: a linear increasing sequence number for this context
  960. *
  961. * Initializes an allocated fence, the caller doesn't have to keep its
  962. * refcount after committing with this fence, but it will need to hold a
  963. * refcount again if &dma_fence_ops.enable_signaling gets called.
  964. *
  965. * Context and seqno are used for easy comparison between fences, allowing
  966. * to check which fence is later by simply using dma_fence_later().
  967. */
  968. void
  969. dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops,
  970. spinlock_t *lock, u64 context, u64 seqno)
  971. {
  972. __dma_fence_init(fence, ops, lock, context, seqno,
  973. BIT(DMA_FENCE_FLAG_SEQNO64_BIT));
  974. }
  975. EXPORT_SYMBOL(dma_fence_init64);
  976. /**
  977. * dma_fence_driver_name - Access the driver name
  978. * @fence: the fence to query
  979. *
  980. * Returns a driver name backing the dma-fence implementation.
  981. *
  982. * IMPORTANT CONSIDERATION:
  983. * Dma-fence contract stipulates that access to driver provided data (data not
  984. * directly embedded into the object itself), such as the &dma_fence.lock and
  985. * memory potentially accessed by the &dma_fence.ops functions, is forbidden
  986. * after the fence has been signalled. Drivers are allowed to free that data,
  987. * and some do.
  988. *
  989. * To allow safe access drivers are mandated to guarantee a RCU grace period
  990. * between signalling the fence and freeing said data.
  991. *
  992. * As such access to the driver name is only valid inside a RCU locked section.
  993. * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded
  994. * by the &rcu_read_lock and &rcu_read_unlock pair.
  995. */
  996. const char __rcu *dma_fence_driver_name(struct dma_fence *fence)
  997. {
  998. RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
  999. "RCU protection is required for safe access to returned string");
  1000. if (!dma_fence_test_signaled_flag(fence))
  1001. return fence->ops->get_driver_name(fence);
  1002. else
  1003. return "detached-driver";
  1004. }
  1005. EXPORT_SYMBOL(dma_fence_driver_name);
  1006. /**
  1007. * dma_fence_timeline_name - Access the timeline name
  1008. * @fence: the fence to query
  1009. *
  1010. * Returns a timeline name provided by the dma-fence implementation.
  1011. *
  1012. * IMPORTANT CONSIDERATION:
  1013. * Dma-fence contract stipulates that access to driver provided data (data not
  1014. * directly embedded into the object itself), such as the &dma_fence.lock and
  1015. * memory potentially accessed by the &dma_fence.ops functions, is forbidden
  1016. * after the fence has been signalled. Drivers are allowed to free that data,
  1017. * and some do.
  1018. *
  1019. * To allow safe access drivers are mandated to guarantee a RCU grace period
  1020. * between signalling the fence and freeing said data.
  1021. *
  1022. * As such access to the driver name is only valid inside a RCU locked section.
  1023. * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded
  1024. * by the &rcu_read_lock and &rcu_read_unlock pair.
  1025. */
  1026. const char __rcu *dma_fence_timeline_name(struct dma_fence *fence)
  1027. {
  1028. RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
  1029. "RCU protection is required for safe access to returned string");
  1030. if (!dma_fence_test_signaled_flag(fence))
  1031. return fence->ops->get_timeline_name(fence);
  1032. else
  1033. return "signaled-timeline";
  1034. }
  1035. EXPORT_SYMBOL(dma_fence_timeline_name);