npu4_regs.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
  4. */
  5. #include <drm/amdxdna_accel.h>
  6. #include <drm/drm_device.h>
  7. #include <drm/gpu_scheduler.h>
  8. #include <linux/bits.h>
  9. #include <linux/sizes.h>
  10. #include "aie2_pci.h"
  11. #include "amdxdna_mailbox.h"
  12. #include "amdxdna_pci_drv.h"
  13. /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
  14. #define MPNPU_PWAITMODE 0x301003C
  15. #define MPNPU_PUB_SEC_INTR 0x3010060
  16. #define MPNPU_PUB_PWRMGMT_INTR 0x3010064
  17. #define MPNPU_PUB_SCRATCH0 0x301006C
  18. #define MPNPU_PUB_SCRATCH1 0x3010070
  19. #define MPNPU_PUB_SCRATCH2 0x3010074
  20. #define MPNPU_PUB_SCRATCH3 0x3010078
  21. #define MPNPU_PUB_SCRATCH4 0x301007C
  22. #define MPNPU_PUB_SCRATCH5 0x3010080
  23. #define MPNPU_PUB_SCRATCH6 0x3010084
  24. #define MPNPU_PUB_SCRATCH7 0x3010088
  25. #define MPNPU_PUB_SCRATCH8 0x301008C
  26. #define MPNPU_PUB_SCRATCH9 0x3010090
  27. #define MPNPU_PUB_SCRATCH10 0x3010094
  28. #define MPNPU_PUB_SCRATCH11 0x3010098
  29. #define MPNPU_PUB_SCRATCH12 0x301009C
  30. #define MPNPU_PUB_SCRATCH13 0x30100A0
  31. #define MPNPU_PUB_SCRATCH14 0x30100A4
  32. #define MPNPU_PUB_SCRATCH15 0x30100A8
  33. #define MP0_C2PMSG_73 0x3810A24
  34. #define MP0_C2PMSG_123 0x3810AEC
  35. #define MP1_C2PMSG_0 0x3B10900
  36. #define MP1_C2PMSG_60 0x3B109F0
  37. #define MP1_C2PMSG_61 0x3B109F4
  38. #define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
  39. #define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
  40. #define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
  41. #define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
  42. #define MMNPU_APERTURE0_BASE 0x3000000
  43. #define MMNPU_APERTURE1_BASE 0x3600000
  44. #define MMNPU_APERTURE3_BASE 0x3810000
  45. #define MMNPU_APERTURE4_BASE 0x3B10000
  46. /* PCIe BAR Index for NPU4 */
  47. #define NPU4_REG_BAR_INDEX 0
  48. #define NPU4_MBOX_BAR_INDEX 0
  49. #define NPU4_PSP_BAR_INDEX 4
  50. #define NPU4_SMU_BAR_INDEX 5
  51. #define NPU4_SRAM_BAR_INDEX 2
  52. /* Associated BARs and Apertures */
  53. #define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
  54. #define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
  55. #define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
  56. #define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
  57. #define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
  58. const struct rt_config npu4_default_rt_cfg[] = {
  59. { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
  60. { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
  61. { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
  62. { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
  63. { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
  64. { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
  65. { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
  66. { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
  67. { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
  68. { 0 },
  69. };
  70. const struct dpm_clk_freq npu4_dpm_clk_table[] = {
  71. {396, 792},
  72. {600, 1056},
  73. {792, 1152},
  74. {975, 1267},
  75. {975, 1267},
  76. {1056, 1408},
  77. {1152, 1584},
  78. {1267, 1800},
  79. { 0 }
  80. };
  81. const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
  82. { .major = 6, .min_minor = 12 },
  83. { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
  84. { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 12 },
  85. { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, .min_minor = 12 },
  86. { .features = GENMASK_ULL(AIE2_TEMPORAL_ONLY, AIE2_NPU_COMMAND), .major = 7 },
  87. { 0 }
  88. };
  89. static const struct amdxdna_dev_priv npu4_dev_priv = {
  90. .fw_path = "amdnpu/17f0_10/",
  91. .rt_config = npu4_default_rt_cfg,
  92. .dpm_clk_tbl = npu4_dpm_clk_table,
  93. .fw_feature_tbl = npu4_fw_feature_table,
  94. .col_align = COL_ALIGN_NATURE,
  95. .mbox_dev_addr = NPU4_MBOX_BAR_BASE,
  96. .mbox_size = 0, /* Use BAR size */
  97. .sram_dev_addr = NPU4_SRAM_BAR_BASE,
  98. .hwctx_limit = 16,
  99. .sram_offs = {
  100. DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
  101. DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
  102. },
  103. .psp_regs_off = {
  104. DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123),
  105. DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
  106. DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4),
  107. DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9),
  108. DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
  109. DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
  110. DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
  111. DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU4_REG, MPNPU_PWAITMODE),
  112. },
  113. .smu_regs_off = {
  114. DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
  115. DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60),
  116. DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
  117. DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
  118. DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
  119. },
  120. .hw_ops = {
  121. .set_dpm = npu4_set_dpm,
  122. },
  123. };
  124. const struct amdxdna_dev_info dev_npu4_info = {
  125. .reg_bar = NPU4_REG_BAR_INDEX,
  126. .mbox_bar = NPU4_MBOX_BAR_INDEX,
  127. .sram_bar = NPU4_SRAM_BAR_INDEX,
  128. .psp_bar = NPU4_PSP_BAR_INDEX,
  129. .smu_bar = NPU4_SMU_BAR_INDEX,
  130. .first_col = 0,
  131. .dev_mem_buf_shift = 15, /* 32 KiB aligned */
  132. .dev_mem_base = AIE2_DEVM_BASE,
  133. .dev_mem_size = AIE2_DEVM_SIZE,
  134. .vbnv = "RyzenAI-npu4",
  135. .device_type = AMDXDNA_DEV_TYPE_KMQ,
  136. .dev_priv = &npu4_dev_priv,
  137. .ops = &aie2_ops, /* NPU4 can share NPU1's callback */
  138. };