ethosu_device.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /* SPDX-License-Identifier: GPL-2.0-only or MIT */
  2. /* Copyright 2025 Arm, Ltd. */
  3. #ifndef __ETHOSU_DEVICE_H__
  4. #define __ETHOSU_DEVICE_H__
  5. #include <linux/bitfield.h>
  6. #include <linux/bits.h>
  7. #include <linux/types.h>
  8. #include <drm/drm_device.h>
  9. #include <drm/gpu_scheduler.h>
  10. #include <drm/ethosu_accel.h>
  11. struct clk;
  12. struct gen_pool;
  13. #define NPU_REG_ID 0x0000
  14. #define NPU_REG_STATUS 0x0004
  15. #define NPU_REG_CMD 0x0008
  16. #define NPU_REG_RESET 0x000c
  17. #define NPU_REG_QBASE 0x0010
  18. #define NPU_REG_QBASE_HI 0x0014
  19. #define NPU_REG_QREAD 0x0018
  20. #define NPU_REG_QCONFIG 0x001c
  21. #define NPU_REG_QSIZE 0x0020
  22. #define NPU_REG_PROT 0x0024
  23. #define NPU_REG_CONFIG 0x0028
  24. #define NPU_REG_REGIONCFG 0x003c
  25. #define NPU_REG_AXILIMIT0 0x0040 // U65
  26. #define NPU_REG_AXILIMIT1 0x0044 // U65
  27. #define NPU_REG_AXILIMIT2 0x0048 // U65
  28. #define NPU_REG_AXILIMIT3 0x004c // U65
  29. #define NPU_REG_MEM_ATTR0 0x0040 // U85
  30. #define NPU_REG_MEM_ATTR1 0x0044 // U85
  31. #define NPU_REG_MEM_ATTR2 0x0048 // U85
  32. #define NPU_REG_MEM_ATTR3 0x004c // U85
  33. #define NPU_REG_AXI_SRAM 0x0050 // U85
  34. #define NPU_REG_AXI_EXT 0x0054 // U85
  35. #define NPU_REG_BASEP(x) (0x0080 + (x) * 8)
  36. #define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8)
  37. #define NPU_BASEP_REGION_MAX 8
  38. #define ID_ARCH_MAJOR_MASK GENMASK(31, 28)
  39. #define ID_ARCH_MINOR_MASK GENMASK(27, 20)
  40. #define ID_ARCH_PATCH_MASK GENMASK(19, 16)
  41. #define ID_VER_MAJOR_MASK GENMASK(11, 8)
  42. #define ID_VER_MINOR_MASK GENMASK(7, 4)
  43. #define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0)
  44. #define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4)
  45. #define STATUS_STATE_RUNNING BIT(0)
  46. #define STATUS_IRQ_RAISED BIT(1)
  47. #define STATUS_BUS_STATUS BIT(2)
  48. #define STATUS_RESET_STATUS BIT(3)
  49. #define STATUS_CMD_PARSE_ERR BIT(4)
  50. #define STATUS_CMD_END_REACHED BIT(5)
  51. #define CMD_CLEAR_IRQ BIT(1)
  52. #define CMD_TRANSITION_TO_RUN BIT(0)
  53. #define RESET_PENDING_CSL BIT(1)
  54. #define RESET_PENDING_CPL BIT(0)
  55. #define PROT_ACTIVE_CSL BIT(1)
  56. enum ethosu_cmds {
  57. NPU_OP_CONV = 0x2,
  58. NPU_OP_DEPTHWISE = 0x3,
  59. NPU_OP_POOL = 0x5,
  60. NPU_OP_ELEMENTWISE = 0x6,
  61. NPU_OP_RESIZE = 0x7, // U85 only
  62. NPU_OP_DMA_START = 0x10,
  63. NPU_SET_IFM_PAD_TOP = 0x100,
  64. NPU_SET_IFM_PAD_LEFT = 0x101,
  65. NPU_SET_IFM_PAD_RIGHT = 0x102,
  66. NPU_SET_IFM_PAD_BOTTOM = 0x103,
  67. NPU_SET_IFM_DEPTH_M1 = 0x104,
  68. NPU_SET_IFM_PRECISION = 0x105,
  69. NPU_SET_IFM_BROADCAST = 0x108,
  70. NPU_SET_IFM_WIDTH0_M1 = 0x10a,
  71. NPU_SET_IFM_HEIGHT0_M1 = 0x10b,
  72. NPU_SET_IFM_HEIGHT1_M1 = 0x10c,
  73. NPU_SET_IFM_REGION = 0x10f,
  74. NPU_SET_OFM_WIDTH_M1 = 0x111,
  75. NPU_SET_OFM_HEIGHT_M1 = 0x112,
  76. NPU_SET_OFM_DEPTH_M1 = 0x113,
  77. NPU_SET_OFM_PRECISION = 0x114,
  78. NPU_SET_OFM_WIDTH0_M1 = 0x11a,
  79. NPU_SET_OFM_HEIGHT0_M1 = 0x11b,
  80. NPU_SET_OFM_HEIGHT1_M1 = 0x11c,
  81. NPU_SET_OFM_REGION = 0x11f,
  82. NPU_SET_KERNEL_WIDTH_M1 = 0x120,
  83. NPU_SET_KERNEL_HEIGHT_M1 = 0x121,
  84. NPU_SET_KERNEL_STRIDE = 0x122,
  85. NPU_SET_WEIGHT_REGION = 0x128,
  86. NPU_SET_SCALE_REGION = 0x129,
  87. NPU_SET_DMA0_SRC_REGION = 0x130,
  88. NPU_SET_DMA0_DST_REGION = 0x131,
  89. NPU_SET_DMA0_SIZE0 = 0x132,
  90. NPU_SET_DMA0_SIZE1 = 0x133,
  91. NPU_SET_IFM2_BROADCAST = 0x180,
  92. NPU_SET_IFM2_PRECISION = 0x185,
  93. NPU_SET_IFM2_WIDTH0_M1 = 0x18a,
  94. NPU_SET_IFM2_HEIGHT0_M1 = 0x18b,
  95. NPU_SET_IFM2_HEIGHT1_M1 = 0x18c,
  96. NPU_SET_IFM2_REGION = 0x18f,
  97. NPU_SET_IFM_BASE0 = 0x4000,
  98. NPU_SET_IFM_BASE1 = 0x4001,
  99. NPU_SET_IFM_BASE2 = 0x4002,
  100. NPU_SET_IFM_BASE3 = 0x4003,
  101. NPU_SET_IFM_STRIDE_X = 0x4004,
  102. NPU_SET_IFM_STRIDE_Y = 0x4005,
  103. NPU_SET_IFM_STRIDE_C = 0x4006,
  104. NPU_SET_OFM_BASE0 = 0x4010,
  105. NPU_SET_OFM_BASE1 = 0x4011,
  106. NPU_SET_OFM_BASE2 = 0x4012,
  107. NPU_SET_OFM_BASE3 = 0x4013,
  108. NPU_SET_OFM_STRIDE_X = 0x4014,
  109. NPU_SET_OFM_STRIDE_Y = 0x4015,
  110. NPU_SET_OFM_STRIDE_C = 0x4016,
  111. NPU_SET_WEIGHT_BASE = 0x4020,
  112. NPU_SET_WEIGHT_LENGTH = 0x4021,
  113. NPU_SET_SCALE_BASE = 0x4022,
  114. NPU_SET_SCALE_LENGTH = 0x4023,
  115. NPU_SET_DMA0_SRC = 0x4030,
  116. NPU_SET_DMA0_DST = 0x4031,
  117. NPU_SET_DMA0_LEN = 0x4032,
  118. NPU_SET_DMA0_SRC_STRIDE0 = 0x4033,
  119. NPU_SET_DMA0_SRC_STRIDE1 = 0x4034,
  120. NPU_SET_DMA0_DST_STRIDE0 = 0x4035,
  121. NPU_SET_DMA0_DST_STRIDE1 = 0x4036,
  122. NPU_SET_IFM2_BASE0 = 0x4080,
  123. NPU_SET_IFM2_BASE1 = 0x4081,
  124. NPU_SET_IFM2_BASE2 = 0x4082,
  125. NPU_SET_IFM2_BASE3 = 0x4083,
  126. NPU_SET_IFM2_STRIDE_X = 0x4084,
  127. NPU_SET_IFM2_STRIDE_Y = 0x4085,
  128. NPU_SET_IFM2_STRIDE_C = 0x4086,
  129. NPU_SET_WEIGHT1_BASE = 0x4090,
  130. NPU_SET_WEIGHT1_LENGTH = 0x4091,
  131. NPU_SET_SCALE1_BASE = 0x4092,
  132. NPU_SET_WEIGHT2_BASE = 0x4092,
  133. NPU_SET_SCALE1_LENGTH = 0x4093,
  134. NPU_SET_WEIGHT2_LENGTH = 0x4093,
  135. NPU_SET_WEIGHT3_BASE = 0x4094,
  136. NPU_SET_WEIGHT3_LENGTH = 0x4095,
  137. };
  138. #define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */
  139. /**
  140. * struct ethosu_device - Ethosu device
  141. */
  142. struct ethosu_device {
  143. /** @base: Base drm_device. */
  144. struct drm_device base;
  145. /** @iomem: CPU mapping of the registers. */
  146. void __iomem *regs;
  147. void __iomem *sram;
  148. struct gen_pool *srampool;
  149. dma_addr_t sramphys;
  150. struct clk_bulk_data *clks;
  151. int num_clks;
  152. int irq;
  153. struct drm_ethosu_npu_info npu_info;
  154. struct ethosu_job *in_flight_job;
  155. /* For in_flight_job and ethosu_job_hw_submit() */
  156. struct mutex job_lock;
  157. /* For dma_fence */
  158. spinlock_t fence_lock;
  159. struct drm_gpu_scheduler sched;
  160. /* For ethosu_job_do_push() */
  161. struct mutex sched_lock;
  162. u64 fence_context;
  163. u64 emit_seqno;
  164. };
  165. #define to_ethosu_device(drm_dev) \
  166. ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base))
  167. static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev)
  168. {
  169. return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1;
  170. }
  171. #endif