record.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. #!/bin/bash
  2. # perf record tests (exclusive)
  3. # SPDX-License-Identifier: GPL-2.0
  4. set -e
  5. shelldir=$(dirname "$0")
  6. # shellcheck source=lib/waiting.sh
  7. . "${shelldir}"/lib/waiting.sh
  8. # shellcheck source=lib/perf_has_symbol.sh
  9. . "${shelldir}"/lib/perf_has_symbol.sh
  10. testsym="test_loop"
  11. testsym2="brstack"
  12. skip_test_missing_symbol ${testsym}
  13. skip_test_missing_symbol ${testsym2}
  14. err=0
  15. perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
  16. script_output=$(mktemp /tmp/__perf_test.perf.data.XXXXX.script)
  17. testprog="perf test -w thloop"
  18. cpu_pmu_dir="/sys/bus/event_source/devices/cpu*"
  19. br_cntr_file="/caps/branch_counter_nr"
  20. br_cntr_output="branch stack counters"
  21. br_cntr_script_output="br_cntr: A"
  22. default_fd_limit=$(ulimit -Sn)
  23. # With option --threads=cpu the number of open file descriptors should be
  24. # equal to sum of: nmb_cpus * nmb_events (2+dummy),
  25. # nmb_threads for perf.data.n (equal to nmb_cpus) and
  26. # 2*nmb_cpus of pipes = 4*nmb_cpus (each pipe has 2 ends)
  27. # All together it needs 8*nmb_cpus file descriptors plus some are also used
  28. # outside of testing, thus raising the limit to 16*nmb_cpus
  29. min_fd_limit=$(($(getconf _NPROCESSORS_ONLN) * 16))
  30. cleanup() {
  31. rm -f "${perfdata}"
  32. rm -f "${perfdata}".old
  33. rm -f "${script_output}"
  34. trap - EXIT TERM INT
  35. }
  36. trap_cleanup() {
  37. echo "Unexpected signal in ${FUNCNAME[1]}"
  38. cleanup
  39. exit 1
  40. }
  41. trap trap_cleanup EXIT TERM INT
  42. test_per_thread() {
  43. echo "Basic --per-thread mode test"
  44. if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null
  45. then
  46. echo "Per-thread record [Skipped event not supported]"
  47. return
  48. fi
  49. if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null
  50. then
  51. echo "Per-thread record [Failed record]"
  52. err=1
  53. return
  54. fi
  55. if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
  56. then
  57. echo "Per-thread record [Failed missing output]"
  58. err=1
  59. return
  60. fi
  61. # run the test program in background (for 30 seconds)
  62. ${testprog} 30 &
  63. TESTPID=$!
  64. rm -f "${perfdata}"
  65. wait_for_threads ${TESTPID} 2
  66. perf record -p "${TESTPID}" --per-thread -o "${perfdata}" sleep 1 2> /dev/null
  67. kill ${TESTPID}
  68. if [ ! -e "${perfdata}" ]
  69. then
  70. echo "Per-thread record [Failed record -p]"
  71. err=1
  72. return
  73. fi
  74. if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
  75. then
  76. echo "Per-thread record [Failed -p missing output]"
  77. err=1
  78. return
  79. fi
  80. echo "Basic --per-thread mode test [Success]"
  81. }
  82. test_register_capture() {
  83. echo "Register capture test"
  84. if ! perf list pmu | grep -q 'br_inst_retired.near_call'
  85. then
  86. echo "Register capture test [Skipped missing event]"
  87. return
  88. fi
  89. if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
  90. then
  91. echo "Register capture test [Skipped missing registers]"
  92. return
  93. fi
  94. if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call \
  95. -c 1000 --per-thread ${testprog} 2> /dev/null \
  96. | perf script -F ip,sym,iregs -i - 2> /dev/null \
  97. | grep -q "DI:"
  98. then
  99. echo "Register capture test [Failed missing output]"
  100. err=1
  101. return
  102. fi
  103. echo "Register capture test [Success]"
  104. }
  105. test_system_wide() {
  106. echo "Basic --system-wide mode test"
  107. if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null
  108. then
  109. echo "System-wide record [Skipped not supported]"
  110. return
  111. fi
  112. if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
  113. then
  114. echo "System-wide record [Failed missing output]"
  115. err=1
  116. return
  117. fi
  118. if ! perf record -aB --synth=no -e cpu-clock,cs --threads=cpu \
  119. -o "${perfdata}" ${testprog} 2> /dev/null
  120. then
  121. echo "System-wide record [Failed record --threads option]"
  122. err=1
  123. return
  124. fi
  125. if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
  126. then
  127. echo "System-wide record [Failed --threads missing output]"
  128. err=1
  129. return
  130. fi
  131. echo "Basic --system-wide mode test [Success]"
  132. }
  133. test_workload() {
  134. echo "Basic target workload test"
  135. if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null
  136. then
  137. echo "Workload record [Failed record]"
  138. err=1
  139. return
  140. fi
  141. if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
  142. then
  143. echo "Workload record [Failed missing output]"
  144. err=1
  145. return
  146. fi
  147. if ! perf record -e cpu-clock,cs --threads=package \
  148. -o "${perfdata}" ${testprog} 2> /dev/null
  149. then
  150. echo "Workload record [Failed record --threads option]"
  151. err=1
  152. return
  153. fi
  154. if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
  155. then
  156. echo "Workload record [Failed --threads missing output]"
  157. err=1
  158. return
  159. fi
  160. echo "Basic target workload test [Success]"
  161. }
  162. test_branch_counter() {
  163. echo "Branch counter test"
  164. # Check if the branch counter feature is supported
  165. for dir in $cpu_pmu_dir
  166. do
  167. if [ ! -e "$dir$br_cntr_file" ]
  168. then
  169. echo "branch counter feature not supported on all core PMUs ($dir) [Skipped]"
  170. return
  171. fi
  172. done
  173. if ! perf record -o "${perfdata}" -e "{branches:p,instructions}" -j any,counter ${testprog} 2> /dev/null
  174. then
  175. echo "Branch counter record test [Failed record]"
  176. err=1
  177. return
  178. fi
  179. if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output"
  180. then
  181. echo "Branch counter report test [Failed missing output]"
  182. err=1
  183. return
  184. fi
  185. if ! perf script -i "${perfdata}" -F +brstackinsn,+brcntr | grep -q "$br_cntr_script_output"
  186. then
  187. echo " Branch counter script test [Failed missing output]"
  188. err=1
  189. return
  190. fi
  191. echo "Branch counter test [Success]"
  192. }
  193. test_cgroup() {
  194. echo "Cgroup sampling test"
  195. if ! perf record -aB --synth=cgroup --all-cgroups -o "${perfdata}" ${testprog} 2> /dev/null
  196. then
  197. echo "Cgroup sampling [Skipped not supported]"
  198. return
  199. fi
  200. if ! perf report -i "${perfdata}" -D | grep -q "CGROUP"
  201. then
  202. echo "Cgroup sampling [Failed missing output]"
  203. err=1
  204. return
  205. fi
  206. if ! perf script -i "${perfdata}" -F cgroup | grep -q -v "unknown"
  207. then
  208. echo "Cgroup sampling [Failed cannot resolve cgroup names]"
  209. err=1
  210. return
  211. fi
  212. echo "Cgroup sampling test [Success]"
  213. }
  214. test_uid() {
  215. echo "Uid sampling test"
  216. if ! perf record -aB --synth=no --uid "$(id -u)" -o "${perfdata}" ${testprog} \
  217. > "${script_output}" 2>&1
  218. then
  219. if grep -q "libbpf.*EPERM" "${script_output}"
  220. then
  221. echo "Uid sampling [Skipped permissions]"
  222. return
  223. else
  224. echo "Uid sampling [Failed to record]"
  225. err=1
  226. # cat "${script_output}"
  227. return
  228. fi
  229. fi
  230. if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
  231. then
  232. echo "Uid sampling [Failed missing output]"
  233. err=1
  234. return
  235. fi
  236. echo "Uid sampling test [Success]"
  237. }
  238. test_leader_sampling() {
  239. echo "Basic leader sampling test"
  240. events="{cycles,cycles}:Su"
  241. [ "$(uname -m)" = "s390x" ] && {
  242. [ ! -d /sys/devices/cpum_sf ] && {
  243. echo "No CPUMF [Skipped record]"
  244. return
  245. }
  246. events="{cpum_sf/SF_CYCLES_BASIC/,cycles}:Su"
  247. perf record -o "${perfdata}" -e "$events" -- perf test -w brstack 2> /dev/null
  248. # Perf grouping might be unsupported, depends on version.
  249. [ "$?" -ne 0 ] && {
  250. echo "Grouping not support [Skipped record]"
  251. return
  252. }
  253. }
  254. if ! perf record -o "${perfdata}" -e "$events" -- \
  255. perf test -w brstack 2> /dev/null
  256. then
  257. echo "Leader sampling [Failed record]"
  258. err=1
  259. return
  260. fi
  261. perf script -i "${perfdata}" | grep brstack > $script_output
  262. # Check if the two instruction counts are equal in each record.
  263. # However, the throttling code doesn't consider event grouping. During throttling, only the
  264. # leader is stopped, causing the slave's counts significantly higher. To temporarily solve this,
  265. # let's set the tolerance rate to 80%.
  266. # TODO: Revert the code for tolerance once the throttling mechanism is fixed.
  267. index=0
  268. valid_counts=0
  269. invalid_counts=0
  270. tolerance_rate=0.8
  271. while IFS= read -r line
  272. do
  273. cycles=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="cycles:") print $(i-1)}')
  274. if [ $(($index%2)) -ne 0 ] && [ ${cycles}x != ${prev_cycles}x ]
  275. then
  276. invalid_counts=$(($invalid_counts+1))
  277. else
  278. valid_counts=$(($valid_counts+1))
  279. fi
  280. index=$(($index+1))
  281. prev_cycles=$cycles
  282. done < "${script_output}"
  283. total_counts=$(bc <<< "$invalid_counts+$valid_counts")
  284. if (( $(bc <<< "$total_counts <= 0") ))
  285. then
  286. echo "Leader sampling [No sample generated]"
  287. err=1
  288. return
  289. fi
  290. isok=$(bc <<< "scale=2; if (($invalid_counts/$total_counts) < (1-$tolerance_rate)) { 0 } else { 1 };")
  291. if [ $isok -eq 1 ]
  292. then
  293. echo "Leader sampling [Failed inconsistent cycles count]"
  294. err=1
  295. else
  296. echo "Basic leader sampling test [Success]"
  297. fi
  298. }
  299. test_topdown_leader_sampling() {
  300. echo "Topdown leader sampling test"
  301. if ! perf stat -e "{slots,topdown-retiring}" true 2> /dev/null
  302. then
  303. echo "Topdown leader sampling [Skipped event parsing failed]"
  304. return
  305. fi
  306. if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null
  307. then
  308. echo "Topdown leader sampling [Failed topdown events not reordered correctly]"
  309. err=1
  310. return
  311. fi
  312. echo "Topdown leader sampling test [Success]"
  313. }
  314. test_precise_max() {
  315. local -i skipped=0
  316. echo "precise_max attribute test"
  317. # Just to make sure event cycles is supported for sampling
  318. if perf record -o "${perfdata}" -e "cycles" true 2> /dev/null
  319. then
  320. if ! perf record -o "${perfdata}" -e "cycles:P" true 2> /dev/null
  321. then
  322. echo "precise_max attribute [Failed cycles:P event]"
  323. err=1
  324. return
  325. fi
  326. else
  327. echo "precise_max attribute [Skipped no cycles:P event]"
  328. ((skipped+=1))
  329. fi
  330. # On s390 event instructions is not supported for perf record
  331. if perf record -o "${perfdata}" -e "instructions" true 2> /dev/null
  332. then
  333. # On AMD, cycles and instructions events are treated differently
  334. if ! perf record -o "${perfdata}" -e "instructions:P" true 2> /dev/null
  335. then
  336. echo "precise_max attribute [Failed instructions:P event]"
  337. err=1
  338. return
  339. fi
  340. else
  341. echo "precise_max attribute [Skipped no instructions:P event]"
  342. ((skipped+=1))
  343. fi
  344. if [ $skipped -eq 2 ]
  345. then
  346. echo "precise_max attribute [Skipped no hardware events]"
  347. else
  348. echo "precise_max attribute test [Success]"
  349. fi
  350. }
  351. test_callgraph() {
  352. echo "Callgraph test"
  353. case $(uname -m)
  354. in s390x)
  355. cmd_flags="--call-graph dwarf -e cpu-clock";;
  356. *)
  357. cmd_flags="-g";;
  358. esac
  359. if ! perf record -o "${perfdata}" $cmd_flags perf test -w brstack
  360. then
  361. echo "Callgraph test [Failed missing output]"
  362. err=1
  363. return
  364. fi
  365. if ! perf report -i "${perfdata}" 2>&1 | grep "${testsym2}"
  366. then
  367. echo "Callgraph test [Failed missing symbol]"
  368. err=1
  369. return
  370. fi
  371. echo "Callgraph test [Success]"
  372. }
  373. test_ratio_to_prev() {
  374. echo "ratio-to-prev test"
  375. if ! perf record -o /dev/null -e "{instructions, cycles/period=100000,ratio-to-prev=0.5/}" \
  376. true 2> /dev/null
  377. then
  378. echo "ratio-to-prev [Skipped not supported]"
  379. return
  380. fi
  381. if ! perf record -o /dev/null -e "instructions, cycles/period=100000,ratio-to-prev=0.5/" \
  382. true |& grep -q 'Invalid use of ratio-to-prev term without preceding element in group'
  383. then
  384. echo "ratio-to-prev test [Failed elements must be in same group]"
  385. err=1
  386. return
  387. fi
  388. if ! perf record -o /dev/null -e "{instructions,dummy,cycles/period=100000,ratio-to-prev=0.5/}" \
  389. true |& grep -q 'must have same PMU'
  390. then
  391. echo "ratio-to-prev test [Failed elements must have same PMU]"
  392. err=1
  393. return
  394. fi
  395. if ! perf record -o /dev/null -e "{instructions,cycles/ratio-to-prev=0.5/}" \
  396. true |& grep -q 'Event period term or count (-c) must be set when using ratio-to-prev term.'
  397. then
  398. echo "ratio-to-prev test [Failed period must be set]"
  399. err=1
  400. return
  401. fi
  402. if ! perf record -o /dev/null -e "{cycles/ratio-to-prev=0.5/}" \
  403. true |& grep -q 'Invalid use of ratio-to-prev term without preceding element in group'
  404. then
  405. echo "ratio-to-prev test [Failed need 2+ events]"
  406. err=1
  407. return
  408. fi
  409. echo "Basic ratio-to-prev record test [Success]"
  410. }
  411. # raise the limit of file descriptors to minimum
  412. if [[ $default_fd_limit -lt $min_fd_limit ]]; then
  413. ulimit -Sn $min_fd_limit
  414. fi
  415. test_per_thread
  416. test_register_capture
  417. test_system_wide
  418. test_workload
  419. test_branch_counter
  420. test_cgroup
  421. test_uid
  422. test_leader_sampling
  423. test_topdown_leader_sampling
  424. test_precise_max
  425. test_callgraph
  426. test_ratio_to_prev
  427. # restore the default value
  428. ulimit -Sn $default_fd_limit
  429. cleanup
  430. exit $err