amd_metrics.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. #!/usr/bin/env python3
  2. # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
  3. import argparse
  4. import math
  5. import os
  6. from typing import Optional
  7. from common_metrics import Cycles
  8. from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric,
  9. JsonEncodeMetricGroupDescriptions, Literal, LoadEvents,
  10. Metric, MetricGroup, Select)
  11. # Global command line arguments.
  12. _args = None
  13. _zen_model: int = 1
  14. interval_sec = Event("duration_time")
  15. ins = Event("instructions")
  16. cycles = Event("cycles")
  17. # Number of CPU cycles scaled for SMT.
  18. smt_cycles = Select(cycles / 2, Literal("#smt_on"), cycles)
  19. def AmdBr():
  20. def Total() -> MetricGroup:
  21. br = Event("ex_ret_brn")
  22. br_m_all = Event("ex_ret_brn_misp")
  23. br_clr = Event("ex_ret_brn_cond_misp",
  24. "ex_ret_msprd_brnch_instr_dir_msmtch",
  25. "ex_ret_brn_resync")
  26. br_r = d_ratio(br, interval_sec)
  27. ins_r = d_ratio(ins, br)
  28. misp_r = d_ratio(br_m_all, br)
  29. clr_r = d_ratio(br_clr, interval_sec)
  30. return MetricGroup("lpm_br_total", [
  31. Metric("lpm_br_total_retired",
  32. "The number of branch instructions retired per second.", br_r,
  33. "insn/s"),
  34. Metric(
  35. "lpm_br_total_mispred",
  36. "The number of branch instructions retired, of any type, that were "
  37. "not correctly predicted as a percentage of all branch instrucions.",
  38. misp_r, "100%"),
  39. Metric("lpm_br_total_insn_between_branches",
  40. "The number of instructions divided by the number of branches.",
  41. ins_r, "insn"),
  42. Metric("lpm_br_total_insn_fe_resteers",
  43. "The number of resync branches per second.", clr_r, "req/s")
  44. ])
  45. def Taken() -> MetricGroup:
  46. br = Event("ex_ret_brn_tkn")
  47. br_m_tk = Event("ex_ret_brn_tkn_misp")
  48. br_r = d_ratio(br, interval_sec)
  49. ins_r = d_ratio(ins, br)
  50. misp_r = d_ratio(br_m_tk, br)
  51. return MetricGroup("lpm_br_taken", [
  52. Metric("lpm_br_taken_retired",
  53. "The number of taken branches that were retired per second.",
  54. br_r, "insn/s"),
  55. Metric(
  56. "lpm_br_taken_mispred",
  57. "The number of retired taken branch instructions that were "
  58. "mispredicted as a percentage of all taken branches.", misp_r,
  59. "100%"),
  60. Metric(
  61. "lpm_br_taken_insn_between_branches",
  62. "The number of instructions divided by the number of taken branches.",
  63. ins_r, "insn"),
  64. ])
  65. def Conditional() -> Optional[MetricGroup]:
  66. global _zen_model
  67. br = Event("ex_ret_brn_cond", "ex_ret_cond")
  68. br_r = d_ratio(br, interval_sec)
  69. ins_r = d_ratio(ins, br)
  70. metrics = [
  71. Metric("lpm_br_cond_retired", "Retired conditional branch instructions.",
  72. br_r, "insn/s"),
  73. Metric("lpm_br_cond_insn_between_branches",
  74. "The number of instructions divided by the number of conditional "
  75. "branches.", ins_r, "insn"),
  76. ]
  77. if _zen_model == 2:
  78. br_m_cond = Event("ex_ret_cond_misp")
  79. misp_r = d_ratio(br_m_cond, br)
  80. metrics += [
  81. Metric("lpm_br_cond_mispred",
  82. "Retired conditional branch instructions mispredicted as a "
  83. "percentage of all conditional branches.", misp_r, "100%"),
  84. ]
  85. return MetricGroup("lpm_br_cond", metrics)
  86. def Fused() -> MetricGroup:
  87. br = Event("ex_ret_fused_instr", "ex_ret_fus_brnch_inst")
  88. br_r = d_ratio(br, interval_sec)
  89. ins_r = d_ratio(ins, br)
  90. return MetricGroup("lpm_br_cond", [
  91. Metric("lpm_br_fused_retired",
  92. "Retired fused branch instructions per second.", br_r, "insn/s"),
  93. Metric(
  94. "lpm_br_fused_insn_between_branches",
  95. "The number of instructions divided by the number of fused "
  96. "branches.", ins_r, "insn"),
  97. ])
  98. def Far() -> MetricGroup:
  99. br = Event("ex_ret_brn_far")
  100. br_r = d_ratio(br, interval_sec)
  101. ins_r = d_ratio(ins, br)
  102. return MetricGroup("lpm_br_far", [
  103. Metric("lpm_br_far_retired", "Retired far control transfers per second.",
  104. br_r, "insn/s"),
  105. Metric(
  106. "lpm_br_far_insn_between_branches",
  107. "The number of instructions divided by the number of far branches.",
  108. ins_r, "insn"),
  109. ])
  110. return MetricGroup("lpm_br", [Total(), Taken(), Conditional(), Fused(), Far()],
  111. description="breakdown of retired branch instructions")
  112. def AmdCtxSw() -> MetricGroup:
  113. cs = Event("context\\-switches")
  114. metrics = [
  115. Metric("lpm_cs_rate", "Context switches per second",
  116. d_ratio(cs, interval_sec), "ctxsw/s")
  117. ]
  118. ev = Event("instructions")
  119. metrics.append(Metric("lpm_cs_instr", "Instructions per context switch",
  120. d_ratio(ev, cs), "instr/cs"))
  121. ev = Event("cycles")
  122. metrics.append(Metric("lpm_cs_cycles", "Cycles per context switch",
  123. d_ratio(ev, cs), "cycles/cs"))
  124. ev = Event("ls_dispatch.pure_ld", "ls_dispatch.ld_dispatch")
  125. metrics.append(Metric("lpm_cs_loads", "Loads per context switch",
  126. d_ratio(ev, cs), "loads/cs"))
  127. ev = Event("ls_dispatch.pure_st", "ls_dispatch.store_dispatch")
  128. metrics.append(Metric("lpm_cs_stores", "Stores per context switch",
  129. d_ratio(ev, cs), "stores/cs"))
  130. ev = Event("ex_ret_brn_tkn")
  131. metrics.append(Metric("lpm_cs_br_taken", "Branches taken per context switch",
  132. d_ratio(ev, cs), "br_taken/cs"))
  133. return MetricGroup("lpm_cs", metrics,
  134. description=("Number of context switches per second, instructions "
  135. "retired & core cycles between context switches"))
  136. def AmdDtlb() -> Optional[MetricGroup]:
  137. global _zen_model
  138. if _zen_model >= 4:
  139. return None
  140. d_dat = Event("ls_dc_accesses") if _zen_model <= 3 else None
  141. d_h4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit")
  142. d_hcoal = Event(
  143. "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit") if _zen_model >= 2 else 0
  144. d_h2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit")
  145. d_h1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit")
  146. d_m4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss")
  147. d_mcoal = Event(
  148. "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss") if _zen_model >= 2 else 0
  149. d_m2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss")
  150. d_m1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss")
  151. d_w0 = Event("ls_tablewalker.dc_type0") if _zen_model <= 3 else None
  152. d_w1 = Event("ls_tablewalker.dc_type1") if _zen_model <= 3 else None
  153. walks = d_w0 + d_w1
  154. walks_r = d_ratio(walks, interval_sec)
  155. ins_w = d_ratio(ins, walks)
  156. l1 = d_dat
  157. l1_r = d_ratio(l1, interval_sec)
  158. l2_hits = d_h4k + d_hcoal + d_h2m + d_h1g
  159. l2_miss = d_m4k + d_mcoal + d_m2m + d_m1g
  160. l2_r = d_ratio(l2_hits + l2_miss, interval_sec)
  161. l1_miss = l2_hits + l2_miss + walks
  162. l1_hits = max(l1 - l1_miss, 0)
  163. ins_l = d_ratio(ins, l1_miss)
  164. return MetricGroup("lpm_dtlb", [
  165. MetricGroup("lpm_dtlb_ov", [
  166. Metric("lpm_dtlb_ov_insn_bt_l1_miss",
  167. "DTLB overview: instructions between l1 misses.", ins_l,
  168. "insns"),
  169. Metric("lpm_dtlb_ov_insn_bt_walks",
  170. "DTLB overview: instructions between dtlb page table walks.",
  171. ins_w, "insns"),
  172. ]),
  173. MetricGroup("lpm_dtlb_l1", [
  174. Metric("lpm_dtlb_l1_hits",
  175. "DTLB L1 hits as percentage of all DTLB L1 accesses.",
  176. d_ratio(l1_hits, l1), "100%"),
  177. Metric("lpm_dtlb_l1_miss",
  178. "DTLB L1 misses as percentage of all DTLB L1 accesses.",
  179. d_ratio(l1_miss, l1), "100%"),
  180. Metric("lpm_dtlb_l1_reqs", "DTLB L1 accesses per second.", l1_r,
  181. "insns/s"),
  182. ]),
  183. MetricGroup("lpm_dtlb_l2", [
  184. Metric("lpm_dtlb_l2_hits",
  185. "DTLB L2 hits as percentage of all DTLB L2 accesses.",
  186. d_ratio(l2_hits, l2_hits + l2_miss), "100%"),
  187. Metric("lpm_dtlb_l2_miss",
  188. "DTLB L2 misses as percentage of all DTLB L2 accesses.",
  189. d_ratio(l2_miss, l2_hits + l2_miss), "100%"),
  190. Metric("lpm_dtlb_l2_reqs", "DTLB L2 accesses per second.", l2_r,
  191. "insns/s"),
  192. MetricGroup("lpm_dtlb_l2_4kb", [
  193. Metric(
  194. "lpm_dtlb_l2_4kb_hits",
  195. "DTLB L2 4kb page size hits as percentage of all DTLB L2 4kb "
  196. "accesses.", d_ratio(d_h4k, d_h4k + d_m4k), "100%"),
  197. Metric(
  198. "lpm_dtlb_l2_4kb_miss",
  199. "DTLB L2 4kb page size misses as percentage of all DTLB L2 4kb"
  200. "accesses.", d_ratio(d_m4k, d_h4k + d_m4k), "100%")
  201. ]),
  202. MetricGroup("lpm_dtlb_l2_coalesced", [
  203. Metric(
  204. "lpm_dtlb_l2_coal_hits",
  205. "DTLB L2 coalesced page (16kb) hits as percentage of all DTLB "
  206. "L2 coalesced accesses.", d_ratio(d_hcoal,
  207. d_hcoal + d_mcoal), "100%"),
  208. Metric(
  209. "lpm_dtlb_l2_coal_miss",
  210. "DTLB L2 coalesced page (16kb) misses as percentage of all "
  211. "DTLB L2 coalesced accesses.",
  212. d_ratio(d_mcoal, d_hcoal + d_mcoal), "100%")
  213. ]),
  214. MetricGroup("lpm_dtlb_l2_2mb", [
  215. Metric(
  216. "lpm_dtlb_l2_2mb_hits",
  217. "DTLB L2 2mb page size hits as percentage of all DTLB L2 2mb "
  218. "accesses.", d_ratio(d_h2m, d_h2m + d_m2m), "100%"),
  219. Metric(
  220. "lpm_dtlb_l2_2mb_miss",
  221. "DTLB L2 2mb page size misses as percentage of all DTLB L2 "
  222. "accesses.", d_ratio(d_m2m, d_h2m + d_m2m), "100%")
  223. ]),
  224. MetricGroup("lpm_dtlb_l2_1g", [
  225. Metric(
  226. "lpm_dtlb_l2_1g_hits",
  227. "DTLB L2 1gb page size hits as percentage of all DTLB L2 1gb "
  228. "accesses.", d_ratio(d_h1g, d_h1g + d_m1g), "100%"),
  229. Metric(
  230. "lpm_dtlb_l2_1g_miss",
  231. "DTLB L2 1gb page size misses as percentage of all DTLB L2 "
  232. "1gb accesses.", d_ratio(d_m1g, d_h1g + d_m1g), "100%")
  233. ]),
  234. ]),
  235. MetricGroup("lpm_dtlb_walks", [
  236. Metric("lpm_dtlb_walks_reqs", "DTLB page table walks per second.",
  237. walks_r, "walks/s"),
  238. ]),
  239. ], description="Data TLB metrics")
  240. def AmdItlb():
  241. global _zen_model
  242. l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit")
  243. l2m = Event("l2_itlb_misses")
  244. l2r = l2h + l2m
  245. itlb_l1_mg = None
  246. l1m = l2r
  247. if _zen_model <= 3:
  248. l1r = Event("ic_fw32")
  249. l1h = max(l1r - l1m, 0)
  250. itlb_l1_mg = MetricGroup("lpm_itlb_l1", [
  251. Metric("lpm_itlb_l1_hits",
  252. "L1 ITLB hits as a perecentage of L1 ITLB accesses.",
  253. d_ratio(l1h, l1h + l1m), "100%"),
  254. Metric("lpm_itlb_l1_miss",
  255. "L1 ITLB misses as a perecentage of L1 ITLB accesses.",
  256. d_ratio(l1m, l1h + l1m), "100%"),
  257. Metric("lpm_itlb_l1_reqs",
  258. "The number of 32B fetch windows transferred from IC pipe to DE "
  259. "instruction decoder per second.", d_ratio(
  260. l1r, interval_sec),
  261. "windows/sec"),
  262. ])
  263. return MetricGroup("lpm_itlb", [
  264. MetricGroup("lpm_itlb_ov", [
  265. Metric("lpm_itlb_ov_insn_bt_l1_miss",
  266. "Number of instructions between l1 misses", d_ratio(
  267. ins, l1m), "insns"),
  268. Metric("lpm_itlb_ov_insn_bt_l2_miss",
  269. "Number of instructions between l2 misses", d_ratio(
  270. ins, l2m), "insns"),
  271. ]),
  272. itlb_l1_mg,
  273. MetricGroup("lpm_itlb_l2", [
  274. Metric("lpm_itlb_l2_hits",
  275. "L2 ITLB hits as a percentage of all L2 ITLB accesses.",
  276. d_ratio(l2h, l2r), "100%"),
  277. Metric("lpm_itlb_l2_miss",
  278. "L2 ITLB misses as a percentage of all L2 ITLB accesses.",
  279. d_ratio(l2m, l2r), "100%"),
  280. Metric("lpm_itlb_l2_reqs", "ITLB accesses per second.",
  281. d_ratio(l2r, interval_sec), "accesses/sec"),
  282. ]),
  283. ], description="Instruction TLB breakdown")
  284. def AmdLdSt() -> MetricGroup:
  285. ldst_ld = Event("ls_dispatch.pure_ld", "ls_dispatch.ld_dispatch")
  286. ldst_st = Event("ls_dispatch.pure_st", "ls_dispatch.store_dispatch")
  287. ldst_ldc1 = Event(f"{ldst_ld}/cmask=1/")
  288. ldst_stc1 = Event(f"{ldst_st}/cmask=1/")
  289. ldst_ldc2 = Event(f"{ldst_ld}/cmask=2/")
  290. ldst_stc2 = Event(f"{ldst_st}/cmask=2/")
  291. ldst_ldc3 = Event(f"{ldst_ld}/cmask=3/")
  292. ldst_stc3 = Event(f"{ldst_st}/cmask=3/")
  293. ldst_cyc = Event("ls_not_halted_cyc")
  294. ld_rate = d_ratio(ldst_ld, interval_sec)
  295. st_rate = d_ratio(ldst_st, interval_sec)
  296. ld_v1 = max(ldst_ldc1 - ldst_ldc2, 0)
  297. ld_v2 = max(ldst_ldc2 - ldst_ldc3, 0)
  298. ld_v3 = ldst_ldc3
  299. st_v1 = max(ldst_stc1 - ldst_stc2, 0)
  300. st_v2 = max(ldst_stc2 - ldst_stc3, 0)
  301. st_v3 = ldst_stc3
  302. return MetricGroup("lpm_ldst", [
  303. MetricGroup("lpm_ldst_total", [
  304. Metric("lpm_ldst_total_ld", "Number of loads dispatched per second.",
  305. ld_rate, "insns/sec"),
  306. Metric("lpm_ldst_total_st", "Number of stores dispatched per second.",
  307. st_rate, "insns/sec"),
  308. ]),
  309. MetricGroup("lpm_ldst_percent_insn", [
  310. Metric("lpm_ldst_percent_insn_ld",
  311. "Load instructions as a percentage of all instructions.",
  312. d_ratio(ldst_ld, ins), "100%"),
  313. Metric("lpm_ldst_percent_insn_st",
  314. "Store instructions as a percentage of all instructions.",
  315. d_ratio(ldst_st, ins), "100%"),
  316. ]),
  317. MetricGroup("lpm_ldst_ret_loads_per_cycle", [
  318. Metric(
  319. "lpm_ldst_ret_loads_per_cycle_1",
  320. "Load instructions retiring in 1 cycle as a percentage of all "
  321. "unhalted cycles.", d_ratio(ld_v1, ldst_cyc), "100%"),
  322. Metric(
  323. "lpm_ldst_ret_loads_per_cycle_2",
  324. "Load instructions retiring in 2 cycles as a percentage of all "
  325. "unhalted cycles.", d_ratio(ld_v2, ldst_cyc), "100%"),
  326. Metric(
  327. "lpm_ldst_ret_loads_per_cycle_3",
  328. "Load instructions retiring in 3 or more cycles as a percentage"
  329. "of all unhalted cycles.", d_ratio(ld_v3, ldst_cyc), "100%"),
  330. ]),
  331. MetricGroup("lpm_ldst_ret_stores_per_cycle", [
  332. Metric(
  333. "lpm_ldst_ret_stores_per_cycle_1",
  334. "Store instructions retiring in 1 cycle as a percentage of all "
  335. "unhalted cycles.", d_ratio(st_v1, ldst_cyc), "100%"),
  336. Metric(
  337. "lpm_ldst_ret_stores_per_cycle_2",
  338. "Store instructions retiring in 2 cycles as a percentage of all "
  339. "unhalted cycles.", d_ratio(st_v2, ldst_cyc), "100%"),
  340. Metric(
  341. "lpm_ldst_ret_stores_per_cycle_3",
  342. "Store instructions retiring in 3 or more cycles as a percentage"
  343. "of all unhalted cycles.", d_ratio(st_v3, ldst_cyc), "100%"),
  344. ]),
  345. MetricGroup("lpm_ldst_insn_bt", [
  346. Metric("lpm_ldst_insn_bt_ld", "Number of instructions between loads.",
  347. d_ratio(ins, ldst_ld), "insns"),
  348. Metric("lpm_ldst_insn_bt_st", "Number of instructions between stores.",
  349. d_ratio(ins, ldst_st), "insns"),
  350. ])
  351. ], description="Breakdown of load/store instructions")
  352. def AmdUpc() -> Metric:
  353. ops = Event("ex_ret_ops", "ex_ret_cops")
  354. upc = d_ratio(ops, smt_cycles)
  355. return Metric("lpm_upc", "Micro-ops retired per core cycle (higher is better)",
  356. upc, "uops/cycle")
  357. def Idle() -> Metric:
  358. cyc = Event("msr/mperf/")
  359. tsc = Event("msr/tsc/")
  360. low = max(tsc - cyc, 0)
  361. return Metric(
  362. "lpm_idle",
  363. "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)",
  364. d_ratio(low, tsc), "100%")
  365. def Rapl() -> MetricGroup:
  366. """Processor socket power consumption estimate.
  367. Use events from the running average power limit (RAPL) driver.
  368. """
  369. # Watts = joules/second
  370. # Currently only energy-pkg is supported by AMD:
  371. # https://lore.kernel.org/lkml/20220105185659.643355-1-eranian@google.com/
  372. pkg = Event("power/energy\\-pkg/")
  373. cond_pkg = Select(pkg, has_event(pkg), math.nan)
  374. scale = 2.3283064365386962890625e-10
  375. metrics = [
  376. Metric("lpm_cpu_power_pkg", "",
  377. d_ratio(cond_pkg * scale, interval_sec), "Watts"),
  378. ]
  379. return MetricGroup("lpm_cpu_power", metrics,
  380. description="Processor socket power consumption estimates")
  381. def UncoreL3():
  382. acc = Event("l3_lookup_state.all_coherent_accesses_to_l3",
  383. "l3_lookup_state.all_l3_req_typs")
  384. miss = Event("l3_lookup_state.l3_miss",
  385. "l3_comb_clstr_state.request_miss")
  386. acc = max(acc, miss)
  387. hits = acc - miss
  388. return MetricGroup("lpm_l3", [
  389. Metric("lpm_l3_accesses", "L3 victim cache accesses",
  390. d_ratio(acc, interval_sec), "accesses/sec"),
  391. Metric("lpm_l3_hits", "L3 victim cache hit rate",
  392. d_ratio(hits, acc), "100%"),
  393. Metric("lpm_l3_miss", "L3 victim cache miss rate", d_ratio(miss, acc),
  394. "100%"),
  395. ], description="L3 cache breakdown per CCX")
  396. def main() -> None:
  397. global _args
  398. global _zen_model
  399. def dir_path(path: str) -> str:
  400. """Validate path is a directory for argparse."""
  401. if os.path.isdir(path):
  402. return path
  403. raise argparse.ArgumentTypeError(
  404. f'\'{path}\' is not a valid directory')
  405. parser = argparse.ArgumentParser(description="AMD perf json generator")
  406. parser.add_argument(
  407. "-metricgroups", help="Generate metricgroups data", action='store_true')
  408. parser.add_argument("model", help="e.g. amdzen[123]")
  409. parser.add_argument(
  410. 'events_path',
  411. type=dir_path,
  412. help='Root of tree containing architecture directories containing json files'
  413. )
  414. _args = parser.parse_args()
  415. directory = f"{_args.events_path}/x86/{_args.model}/"
  416. LoadEvents(directory)
  417. _zen_model = int(_args.model[6:])
  418. all_metrics = MetricGroup("", [
  419. AmdBr(),
  420. AmdCtxSw(),
  421. AmdDtlb(),
  422. AmdItlb(),
  423. AmdLdSt(),
  424. AmdUpc(),
  425. Cycles(),
  426. Idle(),
  427. Rapl(),
  428. UncoreL3(),
  429. ])
  430. if _args.metricgroups:
  431. print(JsonEncodeMetricGroupDescriptions(all_metrics))
  432. else:
  433. print(JsonEncodeMetric(all_metrics))
  434. if __name__ == '__main__':
  435. main()