perf_metric_validation.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. # SPDX-License-Identifier: GPL-2.0
  2. import re
  3. import csv
  4. import json
  5. import argparse
  6. from pathlib import Path
  7. import subprocess
  8. class TestError:
  9. def __init__(self, metric: list[str], wl: str, value: list[float], low: float, up=float('nan'), description=str()):
  10. self.metric: list = metric # multiple metrics in relationship type tests
  11. self.workloads = [wl] # multiple workloads possible
  12. self.collectedValue: list = value
  13. self.valueLowBound = low
  14. self.valueUpBound = up
  15. self.description = description
  16. def __repr__(self) -> str:
  17. if len(self.metric) > 1:
  18. return "\nMetric Relationship Error: \tThe collected value of metric {0}\n\
  19. \tis {1} in workload(s): {2} \n\
  20. \tbut expected value range is [{3}, {4}]\n\
  21. \tRelationship rule description: \'{5}\'".format(self.metric, self.collectedValue, self.workloads,
  22. self.valueLowBound, self.valueUpBound, self.description)
  23. elif len(self.collectedValue) == 0:
  24. return "\nNo Metric Value Error: \tMetric {0} returns with no value \n\
  25. \tworkload(s): {1}".format(self.metric, self.workloads)
  26. else:
  27. return "\nWrong Metric Value Error: \tThe collected value of metric {0}\n\
  28. \tis {1} in workload(s): {2}\n\
  29. \tbut expected value range is [{3}, {4}]"\
  30. .format(self.metric, self.collectedValue, self.workloads,
  31. self.valueLowBound, self.valueUpBound)
  32. class Validator:
  33. def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
  34. workload='true', metrics='', cputype='cpu'):
  35. self.rulefname = rulefname
  36. self.reportfname = reportfname
  37. self.rules = None
  38. self.collectlist: str = metrics
  39. self.metrics = self.__set_metrics(metrics)
  40. self.skiplist = set()
  41. self.tolerance = t
  42. self.cputype = cputype
  43. self.workloads = [x for x in workload.split(",") if x]
  44. self.wlidx = 0 # idx of current workloads
  45. self.allresults = dict() # metric results of all workload
  46. self.alltotalcnt = dict()
  47. self.allpassedcnt = dict()
  48. self.results = dict() # metric results of current workload
  49. # vars for test pass/failure statistics
  50. # metrics with no results or negative results, neg result counts failed tests
  51. self.ignoremetrics = set()
  52. self.totalcnt = 0
  53. self.passedcnt = 0
  54. # vars for errors
  55. self.errlist = list()
  56. # vars for Rule Generator
  57. self.pctgmetrics = set() # Percentage rule
  58. # vars for debug
  59. self.datafname = datafname
  60. self.debug = debug
  61. self.fullrulefname = fullrulefname
  62. def __set_metrics(self, metrics=''):
  63. if metrics != '':
  64. return set(metrics.split(","))
  65. else:
  66. return set()
  67. def read_json(self, filename: str) -> dict:
  68. try:
  69. with open(Path(filename).resolve(), "r") as f:
  70. data = json.loads(f.read())
  71. except OSError as e:
  72. print(f"Error when reading file {e}")
  73. sys.exit()
  74. return data
  75. def json_dump(self, data, output_file):
  76. parent = Path(output_file).parent
  77. if not parent.exists():
  78. parent.mkdir(parents=True)
  79. with open(output_file, "w+") as output_file:
  80. json.dump(data,
  81. output_file,
  82. ensure_ascii=True,
  83. indent=4)
  84. def get_results(self, idx: int = 0):
  85. return self.results.get(idx)
  86. def get_bounds(self, lb, ub, error, alias={}, ridx: int = 0) -> list:
  87. """
  88. Get bounds and tolerance from lb, ub, and error.
  89. If missing lb, use 0.0; missing ub, use float('inf); missing error, use self.tolerance.
  90. @param lb: str/float, lower bound
  91. @param ub: str/float, upper bound
  92. @param error: float/str, error tolerance
  93. @returns: lower bound, return inf if the lower bound is a metric value and is not collected
  94. upper bound, return -1 if the upper bound is a metric value and is not collected
  95. tolerance, denormalized base on upper bound value
  96. """
  97. # init ubv and lbv to invalid values
  98. def get_bound_value(bound, initval, ridx):
  99. val = initval
  100. if isinstance(bound, int) or isinstance(bound, float):
  101. val = bound
  102. elif isinstance(bound, str):
  103. if bound == '':
  104. val = float("inf")
  105. elif bound in alias:
  106. vall = self.get_value(alias[ub], ridx)
  107. if vall:
  108. val = vall[0]
  109. elif bound.replace('.', '1').isdigit():
  110. val = float(bound)
  111. else:
  112. print("Wrong bound: {0}".format(bound))
  113. else:
  114. print("Wrong bound: {0}".format(bound))
  115. return val
  116. ubv = get_bound_value(ub, -1, ridx)
  117. lbv = get_bound_value(lb, float('inf'), ridx)
  118. t = get_bound_value(error, self.tolerance, ridx)
  119. # denormalize error threshold
  120. denormerr = t * ubv / 100 if ubv != 100 and ubv > 0 else t
  121. return lbv, ubv, denormerr
  122. def get_value(self, name: str, ridx: int = 0) -> list:
  123. """
  124. Get value of the metric from self.results.
  125. If result of this metric is not provided, the metric name will be added into self.ignoremetics.
  126. All future test(s) on this metric will fail.
  127. @param name: name of the metric
  128. @returns: list with value found in self.results; list is empty when value is not found.
  129. """
  130. results = []
  131. data = self.results[ridx] if ridx in self.results else self.results[0]
  132. if name not in self.ignoremetrics:
  133. if name in data:
  134. results.append(data[name])
  135. elif name.replace('.', '1').isdigit():
  136. results.append(float(name))
  137. else:
  138. self.ignoremetrics.add(name)
  139. return results
  140. def check_bound(self, val, lb, ub, err):
  141. return True if val <= ub + err and val >= lb - err else False
  142. # Positive Value Sanity check
  143. def pos_val_test(self):
  144. """
  145. Check if metrics value are non-negative.
  146. One metric is counted as one test.
  147. Failure: when metric value is negative or not provided.
  148. Metrics with negative value will be added into self.ignoremetrics.
  149. """
  150. negmetric = dict()
  151. pcnt = 0
  152. tcnt = 0
  153. rerun = list()
  154. results = self.get_results()
  155. if not results:
  156. return
  157. for name, val in results.items():
  158. if val < 0:
  159. negmetric[name] = val
  160. rerun.append(name)
  161. else:
  162. pcnt += 1
  163. tcnt += 1
  164. # The first round collect_perf() run these metrics with simple workload
  165. # "true". We give metrics a second chance with a longer workload if less
  166. # than 20 metrics failed positive test.
  167. if len(rerun) > 0 and len(rerun) < 20:
  168. second_results = dict()
  169. self.second_test(rerun, second_results)
  170. for name, val in second_results.items():
  171. if name not in negmetric:
  172. continue
  173. if val >= 0:
  174. del negmetric[name]
  175. pcnt += 1
  176. if len(negmetric.keys()):
  177. self.ignoremetrics.update(negmetric.keys())
  178. self.errlist.extend(
  179. [TestError([m], self.workloads[self.wlidx], negmetric[m], 0) for m in negmetric.keys()])
  180. return
  181. def evaluate_formula(self, formula: str, alias: dict, ridx: int = 0):
  182. """
  183. Evaluate the value of formula.
  184. @param formula: the formula to be evaluated
  185. @param alias: the dict has alias to metric name mapping
  186. @returns: value of the formula is success; -1 if the one or more metric value not provided
  187. """
  188. stack = []
  189. b = 0
  190. errs = []
  191. sign = "+"
  192. f = str()
  193. # TODO: support parenthesis?
  194. for i in range(len(formula)):
  195. if i+1 == len(formula) or formula[i] in ('+', '-', '*', '/'):
  196. s = alias[formula[b:i]] if i + \
  197. 1 < len(formula) else alias[formula[b:]]
  198. v = self.get_value(s, ridx)
  199. if not v:
  200. errs.append(s)
  201. else:
  202. f = f + "{0}(={1:.4f})".format(s, v[0])
  203. if sign == "*":
  204. stack[-1] = stack[-1] * v
  205. elif sign == "/":
  206. stack[-1] = stack[-1] / v
  207. elif sign == '-':
  208. stack.append(-v[0])
  209. else:
  210. stack.append(v[0])
  211. if i + 1 < len(formula):
  212. sign = formula[i]
  213. f += sign
  214. b = i + 1
  215. if len(errs) > 0:
  216. return -1, "Metric value missing: "+','.join(errs)
  217. val = sum(stack)
  218. return val, f
  219. # Relationships Tests
  220. def relationship_test(self, rule: dict):
  221. """
  222. Validate if the metrics follow the required relationship in the rule.
  223. eg. lower_bound <= eval(formula)<= upper_bound
  224. One rule is counted as ont test.
  225. Failure: when one or more metric result(s) not provided, or when formula evaluated outside of upper/lower bounds.
  226. @param rule: dict with metric name(+alias), formula, and required upper and lower bounds.
  227. """
  228. alias = dict()
  229. for m in rule['Metrics']:
  230. alias[m['Alias']] = m['Name']
  231. lbv, ubv, t = self.get_bounds(
  232. rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
  233. val, f = self.evaluate_formula(
  234. rule['Formula'], alias, ridx=rule['RuleIndex'])
  235. lb = rule['RangeLower']
  236. ub = rule['RangeUpper']
  237. if isinstance(lb, str):
  238. if lb in alias:
  239. lb = alias[lb]
  240. if isinstance(ub, str):
  241. if ub in alias:
  242. ub = alias[ub]
  243. if val == -1:
  244. self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [],
  245. lb, ub, rule['Description']))
  246. elif not self.check_bound(val, lbv, ubv, t):
  247. self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [val],
  248. lb, ub, rule['Description']))
  249. else:
  250. self.passedcnt += 1
  251. self.totalcnt += 1
  252. return
  253. # Single Metric Test
  254. def single_test(self, rule: dict):
  255. """
  256. Validate if the metrics are in the required value range.
  257. eg. lower_bound <= metrics_value <= upper_bound
  258. One metric is counted as one test in this type of test.
  259. One rule may include one or more metrics.
  260. Failure: when the metric value not provided or the value is outside the bounds.
  261. This test updates self.total_cnt.
  262. @param rule: dict with metrics to validate and the value range requirement
  263. """
  264. lbv, ubv, t = self.get_bounds(
  265. rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
  266. metrics = rule['Metrics']
  267. passcnt = 0
  268. totalcnt = 0
  269. failures = dict()
  270. rerun = list()
  271. for m in metrics:
  272. totalcnt += 1
  273. result = self.get_value(m['Name'])
  274. if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t) or m['Name'] in self.skiplist:
  275. passcnt += 1
  276. else:
  277. failures[m['Name']] = result
  278. rerun.append(m['Name'])
  279. if len(rerun) > 0 and len(rerun) < 20:
  280. second_results = dict()
  281. self.second_test(rerun, second_results)
  282. for name, val in second_results.items():
  283. if name not in failures:
  284. continue
  285. if self.check_bound(val, lbv, ubv, t):
  286. passcnt += 1
  287. del failures[name]
  288. else:
  289. failures[name] = [val]
  290. self.results[0][name] = val
  291. self.totalcnt += totalcnt
  292. self.passedcnt += passcnt
  293. if len(failures.keys()) != 0:
  294. self.errlist.extend([TestError([name], self.workloads[self.wlidx], val,
  295. rule['RangeLower'], rule['RangeUpper']) for name, val in failures.items()])
  296. return
  297. def create_report(self):
  298. """
  299. Create final report and write into a JSON file.
  300. """
  301. print(self.errlist)
  302. if self.debug:
  303. allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]}
  304. for i in range(0, len(self.workloads))]
  305. self.json_dump(allres, self.datafname)
  306. def check_rule(self, testtype, metric_list):
  307. """
  308. Check if the rule uses metric(s) that not exist in current platform.
  309. @param metric_list: list of metrics from the rule.
  310. @return: False when find one metric out in Metric file. (This rule should not skipped.)
  311. True when all metrics used in the rule are found in Metric file.
  312. """
  313. if testtype == "RelationshipTest":
  314. for m in metric_list:
  315. if m['Name'] not in self.metrics:
  316. return False
  317. return True
  318. # Start of Collector and Converter
  319. def convert(self, data: list, metricvalues: dict):
  320. """
  321. Convert collected metric data from the -j output to dict of {metric_name:value}.
  322. """
  323. for json_string in data:
  324. try:
  325. result = json.loads(json_string)
  326. if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
  327. name = result["metric-unit"].split(" ")[1] if len(result["metric-unit"].split(" ")) > 1 \
  328. else result["metric-unit"]
  329. metricvalues[name.lower()] = float(result["metric-value"])
  330. except ValueError as error:
  331. continue
  332. return
  333. def _run_perf(self, metric, workload: str):
  334. tool = 'perf'
  335. command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
  336. wl = workload.split()
  337. command.extend(wl)
  338. print(" ".join(command))
  339. cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
  340. data = [x+'}' for x in cmd.stderr.split('}\n') if x]
  341. if data[0][0] != '{':
  342. data[0] = data[0][data[0].find('{'):]
  343. return data
  344. def collect_perf(self, workload: str):
  345. """
  346. Collect metric data with "perf stat -M" on given workload with -a and -j.
  347. """
  348. self.results = dict()
  349. print(f"Starting perf collection")
  350. print(f"Long workload: {workload}")
  351. collectlist = dict()
  352. if self.collectlist != "":
  353. collectlist[0] = {x for x in self.collectlist.split(",")}
  354. else:
  355. collectlist[0] = set(list(self.metrics))
  356. # Create metric set for relationship rules
  357. for rule in self.rules:
  358. if rule["TestType"] == "RelationshipTest":
  359. metrics = [m["Name"] for m in rule["Metrics"]]
  360. if not any(m not in collectlist[0] for m in metrics):
  361. collectlist[rule["RuleIndex"]] = [
  362. ",".join(list(set(metrics)))]
  363. for idx, metrics in collectlist.items():
  364. if idx == 0:
  365. wl = "true"
  366. else:
  367. wl = workload
  368. for metric in metrics:
  369. data = self._run_perf(metric, wl)
  370. if idx not in self.results:
  371. self.results[idx] = dict()
  372. self.convert(data, self.results[idx])
  373. return
  374. def second_test(self, collectlist, second_results):
  375. workload = self.workloads[self.wlidx]
  376. for metric in collectlist:
  377. data = self._run_perf(metric, workload)
  378. self.convert(data, second_results)
  379. # End of Collector and Converter
  380. # Start of Rule Generator
  381. def parse_perf_metrics(self):
  382. """
  383. Read and parse perf metric file:
  384. 1) find metrics with '1%' or '100%' as ScaleUnit for Percent check
  385. 2) create metric name list
  386. """
  387. command = ['perf', 'list', '-j', '--details', 'metrics']
  388. cmd = subprocess.run(command, stdout=subprocess.PIPE,
  389. stderr=subprocess.PIPE, encoding='utf-8')
  390. try:
  391. data = json.loads(cmd.stdout)
  392. for m in data:
  393. if 'MetricName' not in m:
  394. print("Warning: no metric name")
  395. continue
  396. if 'Unit' in m and m['Unit'] != self.cputype:
  397. continue
  398. name = m['MetricName'].lower()
  399. self.metrics.add(name)
  400. if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
  401. self.pctgmetrics.add(name.lower())
  402. except ValueError as error:
  403. print(f"Error when parsing metric data")
  404. sys.exit()
  405. return
  406. def remove_unsupported_rules(self, rules):
  407. new_rules = []
  408. for rule in rules:
  409. add_rule = True
  410. for m in rule["Metrics"]:
  411. if m["Name"] in self.skiplist or m["Name"] not in self.metrics:
  412. add_rule = False
  413. break
  414. if add_rule:
  415. new_rules.append(rule)
  416. return new_rules
  417. def create_rules(self):
  418. """
  419. Create full rules which includes:
  420. 1) All the rules from the "relationshi_rules" file
  421. 2) SingleMetric rule for all the 'percent' metrics
  422. Reindex all the rules to avoid repeated RuleIndex
  423. """
  424. data = self.read_json(self.rulefname)
  425. rules = data['RelationshipRules']
  426. self.skiplist = set([name.lower() for name in data['SkipList']])
  427. self.rules = self.remove_unsupported_rules(rules)
  428. pctgrule = {'RuleIndex': 0,
  429. 'TestType': 'SingleMetricTest',
  430. 'RangeLower': '0',
  431. 'RangeUpper': '100',
  432. 'ErrorThreshold': self.tolerance,
  433. 'Description': 'Metrics in percent unit have value with in [0, 100]',
  434. 'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
  435. self.rules.append(pctgrule)
  436. # Re-index all rules to avoid repeated RuleIndex
  437. idx = 1
  438. for r in self.rules:
  439. r['RuleIndex'] = idx
  440. idx += 1
  441. if self.debug:
  442. # TODO: need to test and generate file name correctly
  443. data = {'RelationshipRules': self.rules, 'SupportedMetrics': [
  444. {"MetricName": name} for name in self.metrics]}
  445. self.json_dump(data, self.fullrulefname)
  446. return
  447. # End of Rule Generator
  448. def _storewldata(self, key):
  449. '''
  450. Store all the data of one workload into the corresponding data structure for all workloads.
  451. @param key: key to the dictionaries (index of self.workloads).
  452. '''
  453. self.allresults[key] = self.results
  454. self.alltotalcnt[key] = self.totalcnt
  455. self.allpassedcnt[key] = self.passedcnt
  456. # Initialize data structures before data validation of each workload
  457. def _init_data(self):
  458. testtypes = ['PositiveValueTest',
  459. 'RelationshipTest', 'SingleMetricTest']
  460. self.results = dict()
  461. self.ignoremetrics = set()
  462. self.errlist = list()
  463. self.totalcnt = 0
  464. self.passedcnt = 0
  465. def test(self):
  466. '''
  467. The real entry point of the test framework.
  468. This function loads the validation rule JSON file and Standard Metric file to create rules for
  469. testing and namemap dictionaries.
  470. It also reads in result JSON file for testing.
  471. In the test process, it passes through each rule and launch correct test function bases on the
  472. 'TestType' field of the rule.
  473. The final report is written into a JSON file.
  474. '''
  475. if not self.collectlist:
  476. self.parse_perf_metrics()
  477. if not self.metrics:
  478. print("No metric found for testing")
  479. return 0
  480. self.create_rules()
  481. for i in range(0, len(self.workloads)):
  482. self.wlidx = i
  483. self._init_data()
  484. self.collect_perf(self.workloads[i])
  485. # Run positive value test
  486. self.pos_val_test()
  487. for r in self.rules:
  488. # skip rules that uses metrics not exist in this platform
  489. testtype = r['TestType']
  490. if not self.check_rule(testtype, r['Metrics']):
  491. continue
  492. if testtype == 'RelationshipTest':
  493. self.relationship_test(r)
  494. elif testtype == 'SingleMetricTest':
  495. self.single_test(r)
  496. else:
  497. print("Unsupported Test Type: ", testtype)
  498. print("Workload: ", self.workloads[i])
  499. print("Total Test Count: ", self.totalcnt)
  500. print("Passed Test Count: ", self.passedcnt)
  501. self._storewldata(i)
  502. self.create_report()
  503. return len(self.errlist) > 0
  504. # End of Class Validator
  505. def main() -> None:
  506. parser = argparse.ArgumentParser(
  507. description="Launch metric value validation")
  508. parser.add_argument(
  509. "-rule", help="Base validation rule file", required=True)
  510. parser.add_argument(
  511. "-output_dir", help="Path for validator output file, report file", required=True)
  512. parser.add_argument("-debug", help="Debug run, save intermediate data to files",
  513. action="store_true", default=False)
  514. parser.add_argument(
  515. "-wl", help="Workload to run while data collection", default="true")
  516. parser.add_argument("-m", help="Metric list to validate", default="")
  517. parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
  518. default="cpu")
  519. args = parser.parse_args()
  520. outpath = Path(args.output_dir)
  521. reportf = Path.joinpath(outpath, 'perf_report.json')
  522. fullrule = Path.joinpath(outpath, 'full_rule.json')
  523. datafile = Path.joinpath(outpath, 'perf_data.json')
  524. validator = Validator(args.rule, reportf, debug=args.debug,
  525. datafname=datafile, fullrulefname=fullrule, workload=args.wl,
  526. metrics=args.m, cputype=args.cputype)
  527. ret = validator.test()
  528. return ret
  529. if __name__ == "__main__":
  530. import sys
  531. sys.exit(main())