panthor_fw.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. /* SPDX-License-Identifier: GPL-2.0 or MIT */
  2. /* Copyright 2023 Collabora ltd. */
  3. #ifndef __PANTHOR_MCU_H__
  4. #define __PANTHOR_MCU_H__
  5. #include <linux/types.h>
  6. struct panthor_device;
  7. struct panthor_kernel_bo;
  8. #define MAX_CSGS 31
  9. #define MAX_CS_PER_CSG 32
  10. struct panthor_fw_ringbuf_input_iface {
  11. u64 insert;
  12. u64 extract;
  13. };
  14. struct panthor_fw_ringbuf_output_iface {
  15. u64 extract;
  16. u32 active;
  17. };
  18. struct panthor_fw_cs_control_iface {
  19. #define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1)
  20. #define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8)
  21. #define CS_FEATURES_COMPUTE BIT(16)
  22. #define CS_FEATURES_FRAGMENT BIT(17)
  23. #define CS_FEATURES_TILER BIT(18)
  24. u32 features;
  25. u32 input_va;
  26. u32 output_va;
  27. };
  28. struct panthor_fw_cs_input_iface {
  29. #define CS_STATE_MASK GENMASK(2, 0)
  30. #define CS_STATE_STOP 0
  31. #define CS_STATE_START 1
  32. #define CS_EXTRACT_EVENT BIT(4)
  33. #define CS_IDLE_SYNC_WAIT BIT(8)
  34. #define CS_IDLE_PROTM_PENDING BIT(9)
  35. #define CS_IDLE_EMPTY BIT(10)
  36. #define CS_IDLE_RESOURCE_REQ BIT(11)
  37. #define CS_TILER_OOM BIT(26)
  38. #define CS_PROTM_PENDING BIT(27)
  39. #define CS_FATAL BIT(30)
  40. #define CS_FAULT BIT(31)
  41. #define CS_REQ_MASK (CS_STATE_MASK | \
  42. CS_EXTRACT_EVENT | \
  43. CS_IDLE_SYNC_WAIT | \
  44. CS_IDLE_PROTM_PENDING | \
  45. CS_IDLE_EMPTY | \
  46. CS_IDLE_RESOURCE_REQ)
  47. #define CS_EVT_MASK (CS_TILER_OOM | \
  48. CS_PROTM_PENDING | \
  49. CS_FATAL | \
  50. CS_FAULT)
  51. u32 req;
  52. #define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0))
  53. #define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8))
  54. u32 config;
  55. u32 reserved1;
  56. u32 ack_irq_mask;
  57. u64 ringbuf_base;
  58. u32 ringbuf_size;
  59. u32 reserved2;
  60. u64 heap_start;
  61. u64 heap_end;
  62. u64 ringbuf_input;
  63. u64 ringbuf_output;
  64. u32 instr_config;
  65. u32 instrbuf_size;
  66. u64 instrbuf_base;
  67. u64 instrbuf_offset_ptr;
  68. };
  69. struct panthor_fw_cs_output_iface {
  70. u32 ack;
  71. u32 reserved1[15];
  72. u64 status_cmd_ptr;
  73. #define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0)
  74. #define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16)
  75. #define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16)
  76. #define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16)
  77. #define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24)
  78. #define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24)
  79. #define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24)
  80. #define CS_STATUS_WAIT_PROGRESS BIT(28)
  81. #define CS_STATUS_WAIT_PROTM BIT(29)
  82. #define CS_STATUS_WAIT_SYNC_64B BIT(30)
  83. #define CS_STATUS_WAIT_SYNC BIT(31)
  84. u32 status_wait;
  85. u32 status_req_resource;
  86. u64 status_wait_sync_ptr;
  87. u32 status_wait_sync_value;
  88. u32 status_scoreboards;
  89. #define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0
  90. #define CS_STATUS_BLOCKED_REASON_SB_WAIT 1
  91. #define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2
  92. #define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3
  93. #define CS_STATUS_BLOCKED_REASON_DEFERRED 4
  94. #define CS_STATUS_BLOCKED_REASON_RESOURCE 5
  95. #define CS_STATUS_BLOCKED_REASON_FLUSH 6
  96. #define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0)
  97. u32 status_blocked_reason;
  98. u32 status_wait_sync_value_hi;
  99. u32 reserved2[6];
  100. #define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0))
  101. #define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0))
  102. u32 fault;
  103. u32 fatal;
  104. u64 fault_info;
  105. u64 fatal_info;
  106. u32 reserved3[10];
  107. u32 heap_vt_start;
  108. u32 heap_vt_end;
  109. u32 reserved4;
  110. u32 heap_frag_end;
  111. u64 heap_address;
  112. };
  113. struct panthor_fw_csg_control_iface {
  114. u32 features;
  115. u32 input_va;
  116. u32 output_va;
  117. u32 suspend_size;
  118. u32 protm_suspend_size;
  119. u32 stream_num;
  120. u32 stream_stride;
  121. };
  122. struct panthor_fw_csg_input_iface {
  123. #define CSG_STATE_MASK GENMASK(2, 0)
  124. #define CSG_STATE_TERMINATE 0
  125. #define CSG_STATE_START 1
  126. #define CSG_STATE_SUSPEND 2
  127. #define CSG_STATE_RESUME 3
  128. #define CSG_ENDPOINT_CONFIG BIT(4)
  129. #define CSG_STATUS_UPDATE BIT(5)
  130. #define CSG_SYNC_UPDATE BIT(28)
  131. #define CSG_IDLE BIT(29)
  132. #define CSG_DOORBELL BIT(30)
  133. #define CSG_PROGRESS_TIMER_EVENT BIT(31)
  134. #define CSG_REQ_MASK (CSG_STATE_MASK | \
  135. CSG_ENDPOINT_CONFIG | \
  136. CSG_STATUS_UPDATE)
  137. #define CSG_EVT_MASK (CSG_SYNC_UPDATE | \
  138. CSG_IDLE | \
  139. CSG_PROGRESS_TIMER_EVENT)
  140. u32 req;
  141. u32 ack_irq_mask;
  142. u32 doorbell_req;
  143. u32 cs_irq_ack;
  144. u32 reserved1[4];
  145. u64 allow_compute;
  146. u64 allow_fragment;
  147. u32 allow_other;
  148. #define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0))
  149. #define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8))
  150. #define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16))
  151. #define CSG_EP_REQ_EXCL_COMPUTE BIT(20)
  152. #define CSG_EP_REQ_EXCL_FRAGMENT BIT(21)
  153. #define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28)
  154. #define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & CSG_EP_REQ_PRIORITY_MASK)
  155. #define CSG_EP_REQ_PRIORITY_GET(x) (((x) & CSG_EP_REQ_PRIORITY_MASK) >> 28)
  156. u32 endpoint_req;
  157. u64 endpoint_req2;
  158. u64 suspend_buf;
  159. u64 protm_suspend_buf;
  160. u32 config;
  161. u32 iter_trace_config;
  162. };
  163. struct panthor_fw_csg_output_iface {
  164. u32 ack;
  165. u32 reserved1;
  166. u32 doorbell_ack;
  167. u32 cs_irq_req;
  168. u32 status_endpoint_current;
  169. u32 status_endpoint_req;
  170. #define CSG_STATUS_STATE_IS_IDLE BIT(0)
  171. u32 status_state;
  172. u32 resource_dep;
  173. };
  174. struct panthor_fw_global_control_iface {
  175. u32 version;
  176. u32 features;
  177. u32 input_va;
  178. u32 output_va;
  179. u32 group_num;
  180. u32 group_stride;
  181. u32 perfcnt_size;
  182. u32 instr_features;
  183. };
  184. struct panthor_fw_global_input_iface {
  185. #define GLB_HALT BIT(0)
  186. #define GLB_CFG_PROGRESS_TIMER BIT(1)
  187. #define GLB_CFG_ALLOC_EN BIT(2)
  188. #define GLB_CFG_POWEROFF_TIMER BIT(3)
  189. #define GLB_PROTM_ENTER BIT(4)
  190. #define GLB_PERFCNT_EN BIT(5)
  191. #define GLB_PERFCNT_SAMPLE BIT(6)
  192. #define GLB_COUNTER_EN BIT(7)
  193. #define GLB_PING BIT(8)
  194. #define GLB_FWCFG_UPDATE BIT(9)
  195. #define GLB_IDLE_EN BIT(10)
  196. #define GLB_SLEEP BIT(12)
  197. #define GLB_STATE_MASK GENMASK(14, 12)
  198. #define GLB_STATE_ACTIVE 0
  199. #define GLB_STATE_HALT 1
  200. #define GLB_STATE_SLEEP 2
  201. #define GLB_STATE_SUSPEND 3
  202. #define GLB_STATE(x) (((x) << 12) & GLB_STATE_MASK)
  203. #define GLB_STATE_GET(x) (((x) & GLB_STATE_MASK) >> 12)
  204. #define GLB_INACTIVE_COMPUTE BIT(20)
  205. #define GLB_INACTIVE_FRAGMENT BIT(21)
  206. #define GLB_INACTIVE_TILER BIT(22)
  207. #define GLB_PROTM_EXIT BIT(23)
  208. #define GLB_PERFCNT_THRESHOLD BIT(24)
  209. #define GLB_PERFCNT_OVERFLOW BIT(25)
  210. #define GLB_IDLE BIT(26)
  211. #define GLB_DBG_CSF BIT(30)
  212. #define GLB_DBG_HOST BIT(31)
  213. #define GLB_REQ_MASK GENMASK(10, 0)
  214. #define GLB_EVT_MASK GENMASK(26, 20)
  215. u32 req;
  216. u32 ack_irq_mask;
  217. u32 doorbell_req;
  218. u32 reserved1;
  219. u32 progress_timer;
  220. #define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0))
  221. #define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31)
  222. u32 poweroff_timer;
  223. u64 core_en_mask;
  224. u32 reserved2;
  225. u32 perfcnt_as;
  226. u64 perfcnt_base;
  227. u32 perfcnt_extract;
  228. u32 reserved3[3];
  229. u32 perfcnt_config;
  230. u32 perfcnt_csg_select;
  231. u32 perfcnt_fw_enable;
  232. u32 perfcnt_csg_enable;
  233. u32 perfcnt_csf_enable;
  234. u32 perfcnt_shader_enable;
  235. u32 perfcnt_tiler_enable;
  236. u32 perfcnt_mmu_l2_enable;
  237. u32 reserved4[8];
  238. u32 idle_timer;
  239. };
  240. enum panthor_fw_halt_status {
  241. PANTHOR_FW_HALT_OK = 0,
  242. PANTHOR_FW_HALT_ON_PANIC = 0x4e,
  243. PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
  244. };
  245. struct panthor_fw_global_output_iface {
  246. u32 ack;
  247. u32 reserved1;
  248. u32 doorbell_ack;
  249. u32 reserved2;
  250. u32 halt_status;
  251. u32 perfcnt_status;
  252. u32 perfcnt_insert;
  253. };
  254. /**
  255. * struct panthor_fw_cs_iface - Firmware command stream slot interface
  256. */
  257. struct panthor_fw_cs_iface {
  258. /**
  259. * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
  260. * field.
  261. *
  262. * Needed so we can update the req field concurrently from the interrupt
  263. * handler and the scheduler logic.
  264. *
  265. * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
  266. * interface sections are mapped uncached/write-combined right now, and
  267. * using cmpxchg() on such mappings leads to SError faults. Revisit when
  268. * we have 'SHARED' GPU mappings hooked up.
  269. */
  270. spinlock_t lock;
  271. /**
  272. * @control: Command stream slot control interface.
  273. *
  274. * Used to expose command stream slot properties.
  275. *
  276. * This interface is read-only.
  277. */
  278. struct panthor_fw_cs_control_iface *control;
  279. /**
  280. * @input: Command stream slot input interface.
  281. *
  282. * Used for host updates/events.
  283. */
  284. struct panthor_fw_cs_input_iface *input;
  285. /**
  286. * @output: Command stream slot output interface.
  287. *
  288. * Used for FW updates/events.
  289. *
  290. * This interface is read-only.
  291. */
  292. const struct panthor_fw_cs_output_iface *output;
  293. };
  294. /**
  295. * struct panthor_fw_csg_iface - Firmware command stream group slot interface
  296. */
  297. struct panthor_fw_csg_iface {
  298. /**
  299. * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
  300. * field.
  301. *
  302. * Needed so we can update the req field concurrently from the interrupt
  303. * handler and the scheduler logic.
  304. *
  305. * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
  306. * interface sections are mapped uncached/write-combined right now, and
  307. * using cmpxchg() on such mappings leads to SError faults. Revisit when
  308. * we have 'SHARED' GPU mappings hooked up.
  309. */
  310. spinlock_t lock;
  311. /**
  312. * @control: Command stream group slot control interface.
  313. *
  314. * Used to expose command stream group slot properties.
  315. *
  316. * This interface is read-only.
  317. */
  318. const struct panthor_fw_csg_control_iface *control;
  319. /**
  320. * @input: Command stream slot input interface.
  321. *
  322. * Used for host updates/events.
  323. */
  324. struct panthor_fw_csg_input_iface *input;
  325. /**
  326. * @output: Command stream group slot output interface.
  327. *
  328. * Used for FW updates/events.
  329. *
  330. * This interface is read-only.
  331. */
  332. const struct panthor_fw_csg_output_iface *output;
  333. };
  334. /**
  335. * struct panthor_fw_global_iface - Firmware global interface
  336. */
  337. struct panthor_fw_global_iface {
  338. /**
  339. * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
  340. * field.
  341. *
  342. * Needed so we can update the req field concurrently from the interrupt
  343. * handler and the scheduler/FW management logic.
  344. *
  345. * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
  346. * interface sections are mapped uncached/write-combined right now, and
  347. * using cmpxchg() on such mappings leads to SError faults. Revisit when
  348. * we have 'SHARED' GPU mappings hooked up.
  349. */
  350. spinlock_t lock;
  351. /**
  352. * @control: Command stream group slot control interface.
  353. *
  354. * Used to expose global FW properties.
  355. *
  356. * This interface is read-only.
  357. */
  358. const struct panthor_fw_global_control_iface *control;
  359. /**
  360. * @input: Global input interface.
  361. *
  362. * Used for host updates/events.
  363. */
  364. struct panthor_fw_global_input_iface *input;
  365. /**
  366. * @output: Global output interface.
  367. *
  368. * Used for FW updates/events.
  369. *
  370. * This interface is read-only.
  371. */
  372. const struct panthor_fw_global_output_iface *output;
  373. };
  374. /**
  375. * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
  376. * @__iface: The interface to operate on.
  377. * @__in_reg: Name of the register to update in the input section of the interface.
  378. * @__out_reg: Name of the register to take as a reference in the output section of the
  379. * interface.
  380. * @__mask: Mask to apply to the update.
  381. *
  382. * The Host -> FW event/message passing was designed to be lockless, with each side of
  383. * the channel having its writeable section. Events are signaled as a difference between
  384. * the host and FW side in the req/ack registers (when a bit differs, there's an event
  385. * pending, when they are the same, nothing needs attention).
  386. *
  387. * This helper allows one to update the req register based on the current value of the
  388. * ack register managed by the FW. Toggling a specific bit will flag an event. In order
  389. * for events to be re-evaluated, the interface doorbell needs to be rung.
  390. *
  391. * Concurrent accesses to the same req register is covered.
  392. *
  393. * Anything requiring atomic updates to multiple registers requires a dedicated lock.
  394. */
  395. #define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
  396. do { \
  397. u32 __cur_val, __new_val, __out_val; \
  398. spin_lock(&(__iface)->lock); \
  399. __cur_val = READ_ONCE((__iface)->input->__in_reg); \
  400. __out_val = READ_ONCE((__iface)->output->__out_reg); \
  401. __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
  402. WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
  403. spin_unlock(&(__iface)->lock); \
  404. } while (0)
  405. /**
  406. * panthor_fw_update_reqs() - Update bits to reflect a configuration change
  407. * @__iface: The interface to operate on.
  408. * @__in_reg: Name of the register to update in the input section of the interface.
  409. * @__val: Value to set.
  410. * @__mask: Mask to apply to the update.
  411. *
  412. * Some configuration get passed through req registers that are also used to
  413. * send events to the FW. Those req registers being updated from the interrupt
  414. * handler, they require special helpers to update the configuration part as well.
  415. *
  416. * Concurrent accesses to the same req register is covered.
  417. *
  418. * Anything requiring atomic updates to multiple registers requires a dedicated lock.
  419. */
  420. #define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
  421. do { \
  422. u32 __cur_val, __new_val; \
  423. spin_lock(&(__iface)->lock); \
  424. __cur_val = READ_ONCE((__iface)->input->__in_reg); \
  425. __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
  426. WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
  427. spin_unlock(&(__iface)->lock); \
  428. } while (0)
  429. #define panthor_fw_update_reqs64(__iface, __in_reg, __val, __mask) \
  430. do { \
  431. u64 __cur_val, __new_val; \
  432. spin_lock(&(__iface)->lock); \
  433. __cur_val = READ_ONCE((__iface)->input->__in_reg); \
  434. __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
  435. WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
  436. spin_unlock(&(__iface)->lock); \
  437. } while (0)
  438. struct panthor_fw_global_iface *
  439. panthor_fw_get_glb_iface(struct panthor_device *ptdev);
  440. struct panthor_fw_csg_iface *
  441. panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
  442. struct panthor_fw_cs_iface *
  443. panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
  444. u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev,
  445. struct panthor_fw_csg_iface *csg_iface);
  446. void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev,
  447. struct panthor_fw_csg_iface *csg_iface, u64 value);
  448. void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev,
  449. struct panthor_fw_csg_iface *csg_iface, u64 value,
  450. u64 mask);
  451. int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
  452. u32 *acked, u32 timeout_ms);
  453. int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
  454. u32 timeout_ms);
  455. void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
  456. struct panthor_kernel_bo *
  457. panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
  458. struct panthor_fw_ringbuf_input_iface **input,
  459. const struct panthor_fw_ringbuf_output_iface **output,
  460. u32 *input_fw_va, u32 *output_fw_va);
  461. struct panthor_kernel_bo *
  462. panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
  463. struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
  464. void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
  465. int panthor_fw_post_reset(struct panthor_device *ptdev);
  466. static inline void panthor_fw_suspend(struct panthor_device *ptdev)
  467. {
  468. panthor_fw_pre_reset(ptdev, false);
  469. }
  470. static inline int panthor_fw_resume(struct panthor_device *ptdev)
  471. {
  472. return panthor_fw_post_reset(ptdev);
  473. }
  474. int panthor_fw_init(struct panthor_device *ptdev);
  475. void panthor_fw_unplug(struct panthor_device *ptdev);
  476. #endif