i915_pmu.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2017-2018 Intel Corporation
  5. */
  6. #include <linux/pm_runtime.h>
  7. #include <drm/drm_print.h>
  8. #include "gt/intel_engine.h"
  9. #include "gt/intel_engine_pm.h"
  10. #include "gt/intel_engine_regs.h"
  11. #include "gt/intel_engine_user.h"
  12. #include "gt/intel_gt.h"
  13. #include "gt/intel_gt_pm.h"
  14. #include "gt/intel_gt_regs.h"
  15. #include "gt/intel_rc6.h"
  16. #include "gt/intel_rps.h"
  17. #include "i915_drv.h"
  18. #include "i915_pmu.h"
  19. /* Frequency for the sampling timer for events which need it. */
  20. #define FREQUENCY 200
  21. #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
  22. #define ENGINE_SAMPLE_MASK \
  23. (BIT(I915_SAMPLE_BUSY) | \
  24. BIT(I915_SAMPLE_WAIT) | \
  25. BIT(I915_SAMPLE_SEMA))
  26. static struct i915_pmu *event_to_pmu(struct perf_event *event)
  27. {
  28. return container_of(event->pmu, struct i915_pmu, base);
  29. }
  30. static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu)
  31. {
  32. return container_of(pmu, struct drm_i915_private, pmu);
  33. }
  34. static u8 engine_config_sample(u64 config)
  35. {
  36. return config & I915_PMU_SAMPLE_MASK;
  37. }
  38. static u8 engine_event_sample(struct perf_event *event)
  39. {
  40. return engine_config_sample(event->attr.config);
  41. }
  42. static u8 engine_event_class(struct perf_event *event)
  43. {
  44. return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
  45. }
  46. static u8 engine_event_instance(struct perf_event *event)
  47. {
  48. return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
  49. }
  50. static bool is_engine_config(const u64 config)
  51. {
  52. return config < __I915_PMU_OTHER(0);
  53. }
  54. static unsigned int config_gt_id(const u64 config)
  55. {
  56. return config >> __I915_PMU_GT_SHIFT;
  57. }
  58. static u64 config_counter(const u64 config)
  59. {
  60. return config & ~(~0ULL << __I915_PMU_GT_SHIFT);
  61. }
  62. static unsigned int other_bit(const u64 config)
  63. {
  64. unsigned int val;
  65. switch (config_counter(config)) {
  66. case I915_PMU_ACTUAL_FREQUENCY:
  67. val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
  68. break;
  69. case I915_PMU_REQUESTED_FREQUENCY:
  70. val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
  71. break;
  72. case I915_PMU_RC6_RESIDENCY:
  73. val = __I915_PMU_RC6_RESIDENCY_ENABLED;
  74. break;
  75. default:
  76. /*
  77. * Events that do not require sampling, or tracking state
  78. * transitions between enabled and disabled can be ignored.
  79. */
  80. return -1;
  81. }
  82. return I915_ENGINE_SAMPLE_COUNT +
  83. config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT +
  84. val;
  85. }
  86. static unsigned int config_bit(const u64 config)
  87. {
  88. if (is_engine_config(config))
  89. return engine_config_sample(config);
  90. else
  91. return other_bit(config);
  92. }
  93. static __always_inline u32 config_mask(const u64 config)
  94. {
  95. unsigned int bit = config_bit(config);
  96. if (__builtin_constant_p(bit))
  97. BUILD_BUG_ON(bit >
  98. BITS_PER_TYPE(typeof_member(struct i915_pmu,
  99. enable)) - 1);
  100. else
  101. WARN_ON_ONCE(bit >
  102. BITS_PER_TYPE(typeof_member(struct i915_pmu,
  103. enable)) - 1);
  104. return BIT(bit);
  105. }
  106. static bool is_engine_event(struct perf_event *event)
  107. {
  108. return is_engine_config(event->attr.config);
  109. }
  110. static unsigned int event_bit(struct perf_event *event)
  111. {
  112. return config_bit(event->attr.config);
  113. }
  114. static u32 frequency_enabled_mask(void)
  115. {
  116. unsigned int i;
  117. u32 mask = 0;
  118. for (i = 0; i < I915_PMU_MAX_GT; i++)
  119. mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
  120. config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
  121. return mask;
  122. }
  123. static bool pmu_needs_timer(struct i915_pmu *pmu)
  124. {
  125. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  126. u32 enable;
  127. /*
  128. * Only some counters need the sampling timer.
  129. *
  130. * We start with a bitmask of all currently enabled events.
  131. */
  132. enable = pmu->enable;
  133. /*
  134. * Mask out all the ones which do not need the timer, or in
  135. * other words keep all the ones that could need the timer.
  136. */
  137. enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
  138. /*
  139. * Also there is software busyness tracking available we do not
  140. * need the timer for I915_SAMPLE_BUSY counter.
  141. */
  142. if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
  143. enable &= ~BIT(I915_SAMPLE_BUSY);
  144. /*
  145. * If some bits remain it means we need the sampling timer running.
  146. */
  147. return enable;
  148. }
  149. static u64 __get_rc6(struct intel_gt *gt)
  150. {
  151. struct drm_i915_private *i915 = gt->i915;
  152. u64 val;
  153. val = intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6);
  154. if (HAS_RC6p(i915))
  155. val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6p);
  156. if (HAS_RC6pp(i915))
  157. val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6pp);
  158. return val;
  159. }
  160. static inline s64 ktime_since_raw(const ktime_t kt)
  161. {
  162. return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
  163. }
  164. static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
  165. {
  166. return pmu->sample[gt_id][sample].cur;
  167. }
  168. static void
  169. store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
  170. {
  171. pmu->sample[gt_id][sample].cur = val;
  172. }
  173. static void
  174. add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
  175. {
  176. pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
  177. }
  178. static u64 get_rc6(struct intel_gt *gt)
  179. {
  180. struct drm_i915_private *i915 = gt->i915;
  181. const unsigned int gt_id = gt->info.id;
  182. struct i915_pmu *pmu = &i915->pmu;
  183. intel_wakeref_t wakeref;
  184. unsigned long flags;
  185. u64 val;
  186. wakeref = intel_gt_pm_get_if_awake(gt);
  187. if (wakeref) {
  188. val = __get_rc6(gt);
  189. intel_gt_pm_put_async(gt, wakeref);
  190. }
  191. spin_lock_irqsave(&pmu->lock, flags);
  192. if (wakeref) {
  193. store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
  194. } else {
  195. /*
  196. * We think we are runtime suspended.
  197. *
  198. * Report the delta from when the device was suspended to now,
  199. * on top of the last known real value, as the approximated RC6
  200. * counter value.
  201. */
  202. val = ktime_since_raw(pmu->sleep_last[gt_id]);
  203. val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
  204. }
  205. if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
  206. val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
  207. else
  208. store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
  209. spin_unlock_irqrestore(&pmu->lock, flags);
  210. return val;
  211. }
  212. static void init_rc6(struct i915_pmu *pmu)
  213. {
  214. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  215. struct intel_gt *gt;
  216. unsigned int i;
  217. for_each_gt(gt, i915, i) {
  218. intel_wakeref_t wakeref;
  219. with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
  220. u64 val = __get_rc6(gt);
  221. store_sample(pmu, i, __I915_SAMPLE_RC6, val);
  222. store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
  223. val);
  224. pmu->sleep_last[i] = ktime_get_raw();
  225. }
  226. }
  227. }
  228. static void park_rc6(struct intel_gt *gt)
  229. {
  230. struct i915_pmu *pmu = &gt->i915->pmu;
  231. store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
  232. pmu->sleep_last[gt->info.id] = ktime_get_raw();
  233. }
  234. static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
  235. {
  236. if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
  237. pmu->timer_enabled = true;
  238. pmu->timer_last = ktime_get();
  239. hrtimer_start_range_ns(&pmu->timer,
  240. ns_to_ktime(PERIOD), 0,
  241. HRTIMER_MODE_REL_PINNED);
  242. }
  243. }
  244. void i915_pmu_gt_parked(struct intel_gt *gt)
  245. {
  246. struct i915_pmu *pmu = &gt->i915->pmu;
  247. if (!pmu->registered)
  248. return;
  249. spin_lock_irq(&pmu->lock);
  250. park_rc6(gt);
  251. /*
  252. * Signal sampling timer to stop if only engine events are enabled and
  253. * GPU went idle.
  254. */
  255. pmu->unparked &= ~BIT(gt->info.id);
  256. if (pmu->unparked == 0)
  257. pmu->timer_enabled = false;
  258. spin_unlock_irq(&pmu->lock);
  259. }
  260. void i915_pmu_gt_unparked(struct intel_gt *gt)
  261. {
  262. struct i915_pmu *pmu = &gt->i915->pmu;
  263. if (!pmu->registered)
  264. return;
  265. spin_lock_irq(&pmu->lock);
  266. /*
  267. * Re-enable sampling timer when GPU goes active.
  268. */
  269. if (pmu->unparked == 0)
  270. __i915_pmu_maybe_start_timer(pmu);
  271. pmu->unparked |= BIT(gt->info.id);
  272. spin_unlock_irq(&pmu->lock);
  273. }
  274. static void
  275. add_sample(struct i915_pmu_sample *sample, u32 val)
  276. {
  277. sample->cur += val;
  278. }
  279. static bool exclusive_mmio_access(const struct drm_i915_private *i915)
  280. {
  281. /*
  282. * We have to avoid concurrent mmio cache line access on gen7 or
  283. * risk a machine hang. For a fun history lesson dig out the old
  284. * userspace intel_gpu_top and run it on Ivybridge or Haswell!
  285. */
  286. return GRAPHICS_VER(i915) == 7;
  287. }
  288. static void gen3_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
  289. {
  290. struct intel_engine_pmu *pmu = &engine->pmu;
  291. bool busy;
  292. u32 val;
  293. val = ENGINE_READ_FW(engine, RING_CTL);
  294. if (val == 0) /* powerwell off => engine idle */
  295. return;
  296. if (val & RING_WAIT)
  297. add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
  298. if (val & RING_WAIT_SEMAPHORE)
  299. add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
  300. /* No need to sample when busy stats are supported. */
  301. if (intel_engine_supports_stats(engine))
  302. return;
  303. /*
  304. * While waiting on a semaphore or event, MI_MODE reports the
  305. * ring as idle. However, previously using the seqno, and with
  306. * execlists sampling, we account for the ring waiting as the
  307. * engine being busy. Therefore, we record the sample as being
  308. * busy if either waiting or !idle.
  309. */
  310. busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
  311. if (!busy) {
  312. val = ENGINE_READ_FW(engine, RING_MI_MODE);
  313. busy = !(val & MODE_IDLE);
  314. }
  315. if (busy)
  316. add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
  317. }
  318. static void gen2_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
  319. {
  320. struct intel_engine_pmu *pmu = &engine->pmu;
  321. u32 tail, head, acthd;
  322. tail = ENGINE_READ_FW(engine, RING_TAIL);
  323. head = ENGINE_READ_FW(engine, RING_HEAD);
  324. acthd = ENGINE_READ_FW(engine, ACTHD);
  325. if (head & HEAD_WAIT_I8XX)
  326. add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
  327. if (head & HEAD_WAIT_I8XX || head != acthd ||
  328. (head & HEAD_ADDR) != (tail & TAIL_ADDR))
  329. add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
  330. }
  331. static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
  332. {
  333. if (GRAPHICS_VER(engine->i915) >= 3)
  334. gen3_engine_sample(engine, period_ns);
  335. else
  336. gen2_engine_sample(engine, period_ns);
  337. }
  338. static void
  339. engines_sample(struct intel_gt *gt, unsigned int period_ns)
  340. {
  341. struct drm_i915_private *i915 = gt->i915;
  342. struct intel_engine_cs *engine;
  343. enum intel_engine_id id;
  344. unsigned long flags;
  345. if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
  346. return;
  347. if (!intel_gt_pm_is_awake(gt))
  348. return;
  349. for_each_engine(engine, gt, id) {
  350. if (!engine->pmu.enable)
  351. continue;
  352. if (!intel_engine_pm_get_if_awake(engine))
  353. continue;
  354. if (exclusive_mmio_access(i915)) {
  355. spin_lock_irqsave(&engine->uncore->lock, flags);
  356. engine_sample(engine, period_ns);
  357. spin_unlock_irqrestore(&engine->uncore->lock, flags);
  358. } else {
  359. engine_sample(engine, period_ns);
  360. }
  361. intel_engine_pm_put_async(engine);
  362. }
  363. }
  364. static bool
  365. frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
  366. {
  367. return pmu->enable &
  368. (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) |
  369. config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt)));
  370. }
  371. static void
  372. frequency_sample(struct intel_gt *gt, unsigned int period_ns)
  373. {
  374. struct drm_i915_private *i915 = gt->i915;
  375. const unsigned int gt_id = gt->info.id;
  376. struct i915_pmu *pmu = &i915->pmu;
  377. struct intel_rps *rps = &gt->rps;
  378. intel_wakeref_t wakeref;
  379. if (!frequency_sampling_enabled(pmu, gt_id))
  380. return;
  381. /* Report 0/0 (actual/requested) frequency while parked. */
  382. wakeref = intel_gt_pm_get_if_awake(gt);
  383. if (!wakeref)
  384. return;
  385. if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
  386. u32 val;
  387. /*
  388. * We take a quick peek here without using forcewake
  389. * so that we don't perturb the system under observation
  390. * (forcewake => !rc6 => increased power use). We expect
  391. * that if the read fails because it is outside of the
  392. * mmio power well, then it will return 0 -- in which
  393. * case we assume the system is running at the intended
  394. * frequency. Fortunately, the read should rarely fail!
  395. */
  396. val = intel_rps_read_actual_frequency_fw(rps);
  397. if (!val)
  398. val = intel_gpu_freq(rps, rps->cur_freq);
  399. add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
  400. val, period_ns / 1000);
  401. }
  402. if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
  403. add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
  404. intel_rps_get_requested_frequency(rps),
  405. period_ns / 1000);
  406. }
  407. intel_gt_pm_put_async(gt, wakeref);
  408. }
  409. static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
  410. {
  411. struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer);
  412. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  413. unsigned int period_ns;
  414. struct intel_gt *gt;
  415. unsigned int i;
  416. ktime_t now;
  417. if (!READ_ONCE(pmu->timer_enabled))
  418. return HRTIMER_NORESTART;
  419. now = ktime_get();
  420. period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
  421. pmu->timer_last = now;
  422. /*
  423. * Strictly speaking the passed in period may not be 100% accurate for
  424. * all internal calculation, since some amount of time can be spent on
  425. * grabbing the forcewake. However the potential error from timer call-
  426. * back delay greatly dominates this so we keep it simple.
  427. */
  428. for_each_gt(gt, i915, i) {
  429. if (!(pmu->unparked & BIT(i)))
  430. continue;
  431. engines_sample(gt, period_ns);
  432. frequency_sample(gt, period_ns);
  433. }
  434. hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
  435. return HRTIMER_RESTART;
  436. }
  437. static void i915_pmu_event_destroy(struct perf_event *event)
  438. {
  439. struct i915_pmu *pmu = event_to_pmu(event);
  440. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  441. drm_WARN_ON(&i915->drm, event->parent);
  442. drm_dev_put(&i915->drm);
  443. }
  444. static int
  445. engine_event_status(struct intel_engine_cs *engine,
  446. enum drm_i915_pmu_engine_sample sample)
  447. {
  448. switch (sample) {
  449. case I915_SAMPLE_BUSY:
  450. case I915_SAMPLE_WAIT:
  451. break;
  452. case I915_SAMPLE_SEMA:
  453. if (GRAPHICS_VER(engine->i915) < 6)
  454. return -ENODEV;
  455. break;
  456. default:
  457. return -ENOENT;
  458. }
  459. return 0;
  460. }
  461. static int
  462. config_status(struct drm_i915_private *i915, u64 config)
  463. {
  464. struct intel_gt *gt = to_gt(i915);
  465. unsigned int gt_id = config_gt_id(config);
  466. unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0;
  467. if (gt_id > max_gt_id)
  468. return -ENOENT;
  469. switch (config_counter(config)) {
  470. case I915_PMU_ACTUAL_FREQUENCY:
  471. if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
  472. /* Requires a mutex for sampling! */
  473. return -ENODEV;
  474. fallthrough;
  475. case I915_PMU_REQUESTED_FREQUENCY:
  476. if (GRAPHICS_VER(i915) < 6)
  477. return -ENODEV;
  478. break;
  479. case I915_PMU_INTERRUPTS:
  480. if (gt_id)
  481. return -ENOENT;
  482. break;
  483. case I915_PMU_RC6_RESIDENCY:
  484. if (!gt->rc6.supported)
  485. return -ENODEV;
  486. break;
  487. case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
  488. break;
  489. default:
  490. return -ENOENT;
  491. }
  492. return 0;
  493. }
  494. static int engine_event_init(struct perf_event *event)
  495. {
  496. struct i915_pmu *pmu = event_to_pmu(event);
  497. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  498. struct intel_engine_cs *engine;
  499. engine = intel_engine_lookup_user(i915, engine_event_class(event),
  500. engine_event_instance(event));
  501. if (!engine)
  502. return -ENODEV;
  503. return engine_event_status(engine, engine_event_sample(event));
  504. }
  505. static int i915_pmu_event_init(struct perf_event *event)
  506. {
  507. struct i915_pmu *pmu = event_to_pmu(event);
  508. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  509. int ret;
  510. if (!pmu->registered)
  511. return -ENODEV;
  512. if (event->attr.type != event->pmu->type)
  513. return -ENOENT;
  514. /* unsupported modes and filters */
  515. if (event->attr.sample_period) /* no sampling */
  516. return -EINVAL;
  517. if (has_branch_stack(event))
  518. return -EOPNOTSUPP;
  519. if (event->cpu < 0)
  520. return -EINVAL;
  521. if (is_engine_event(event))
  522. ret = engine_event_init(event);
  523. else
  524. ret = config_status(i915, event->attr.config);
  525. if (ret)
  526. return ret;
  527. if (!event->parent) {
  528. drm_dev_get(&i915->drm);
  529. event->destroy = i915_pmu_event_destroy;
  530. }
  531. return 0;
  532. }
  533. static u64 __i915_pmu_event_read(struct perf_event *event)
  534. {
  535. struct i915_pmu *pmu = event_to_pmu(event);
  536. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  537. u64 val = 0;
  538. if (is_engine_event(event)) {
  539. u8 sample = engine_event_sample(event);
  540. struct intel_engine_cs *engine;
  541. engine = intel_engine_lookup_user(i915,
  542. engine_event_class(event),
  543. engine_event_instance(event));
  544. if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
  545. /* Do nothing */
  546. } else if (sample == I915_SAMPLE_BUSY &&
  547. intel_engine_supports_stats(engine)) {
  548. ktime_t unused;
  549. val = ktime_to_ns(intel_engine_get_busy_time(engine,
  550. &unused));
  551. } else {
  552. val = engine->pmu.sample[sample].cur;
  553. }
  554. } else {
  555. const unsigned int gt_id = config_gt_id(event->attr.config);
  556. const u64 config = config_counter(event->attr.config);
  557. switch (config) {
  558. case I915_PMU_ACTUAL_FREQUENCY:
  559. val =
  560. div_u64(read_sample(pmu, gt_id,
  561. __I915_SAMPLE_FREQ_ACT),
  562. USEC_PER_SEC /* to MHz */);
  563. break;
  564. case I915_PMU_REQUESTED_FREQUENCY:
  565. val =
  566. div_u64(read_sample(pmu, gt_id,
  567. __I915_SAMPLE_FREQ_REQ),
  568. USEC_PER_SEC /* to MHz */);
  569. break;
  570. case I915_PMU_INTERRUPTS:
  571. val = READ_ONCE(pmu->irq_count);
  572. break;
  573. case I915_PMU_RC6_RESIDENCY:
  574. val = get_rc6(i915->gt[gt_id]);
  575. break;
  576. case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
  577. val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
  578. break;
  579. }
  580. }
  581. return val;
  582. }
  583. static void i915_pmu_event_read(struct perf_event *event)
  584. {
  585. struct i915_pmu *pmu = event_to_pmu(event);
  586. struct hw_perf_event *hwc = &event->hw;
  587. u64 prev, new;
  588. if (!pmu->registered) {
  589. event->hw.state = PERF_HES_STOPPED;
  590. return;
  591. }
  592. prev = local64_read(&hwc->prev_count);
  593. do {
  594. new = __i915_pmu_event_read(event);
  595. } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new));
  596. local64_add(new - prev, &event->count);
  597. }
  598. static void i915_pmu_enable(struct perf_event *event)
  599. {
  600. struct i915_pmu *pmu = event_to_pmu(event);
  601. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  602. const unsigned int bit = event_bit(event);
  603. unsigned long flags;
  604. if (bit == -1)
  605. goto update;
  606. spin_lock_irqsave(&pmu->lock, flags);
  607. /*
  608. * Update the bitmask of enabled events and increment
  609. * the event reference counter.
  610. */
  611. BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
  612. GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
  613. GEM_BUG_ON(pmu->enable_count[bit] == ~0);
  614. pmu->enable |= BIT(bit);
  615. pmu->enable_count[bit]++;
  616. /*
  617. * Start the sampling timer if needed and not already enabled.
  618. */
  619. __i915_pmu_maybe_start_timer(pmu);
  620. /*
  621. * For per-engine events the bitmask and reference counting
  622. * is stored per engine.
  623. */
  624. if (is_engine_event(event)) {
  625. u8 sample = engine_event_sample(event);
  626. struct intel_engine_cs *engine;
  627. engine = intel_engine_lookup_user(i915,
  628. engine_event_class(event),
  629. engine_event_instance(event));
  630. BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
  631. I915_ENGINE_SAMPLE_COUNT);
  632. BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
  633. I915_ENGINE_SAMPLE_COUNT);
  634. GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
  635. GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
  636. GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
  637. engine->pmu.enable |= BIT(sample);
  638. engine->pmu.enable_count[sample]++;
  639. }
  640. spin_unlock_irqrestore(&pmu->lock, flags);
  641. update:
  642. /*
  643. * Store the current counter value so we can report the correct delta
  644. * for all listeners. Even when the event was already enabled and has
  645. * an existing non-zero value.
  646. */
  647. local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
  648. }
  649. static void i915_pmu_disable(struct perf_event *event)
  650. {
  651. struct i915_pmu *pmu = event_to_pmu(event);
  652. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  653. const unsigned int bit = event_bit(event);
  654. unsigned long flags;
  655. if (bit == -1)
  656. return;
  657. spin_lock_irqsave(&pmu->lock, flags);
  658. if (is_engine_event(event)) {
  659. u8 sample = engine_event_sample(event);
  660. struct intel_engine_cs *engine;
  661. engine = intel_engine_lookup_user(i915,
  662. engine_event_class(event),
  663. engine_event_instance(event));
  664. GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
  665. GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
  666. GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
  667. /*
  668. * Decrement the reference count and clear the enabled
  669. * bitmask when the last listener on an event goes away.
  670. */
  671. if (--engine->pmu.enable_count[sample] == 0)
  672. engine->pmu.enable &= ~BIT(sample);
  673. }
  674. GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
  675. GEM_BUG_ON(pmu->enable_count[bit] == 0);
  676. /*
  677. * Decrement the reference count and clear the enabled
  678. * bitmask when the last listener on an event goes away.
  679. */
  680. if (--pmu->enable_count[bit] == 0) {
  681. pmu->enable &= ~BIT(bit);
  682. pmu->timer_enabled &= pmu_needs_timer(pmu);
  683. }
  684. spin_unlock_irqrestore(&pmu->lock, flags);
  685. }
  686. static void i915_pmu_event_start(struct perf_event *event, int flags)
  687. {
  688. struct i915_pmu *pmu = event_to_pmu(event);
  689. if (!pmu->registered)
  690. return;
  691. i915_pmu_enable(event);
  692. event->hw.state = 0;
  693. }
  694. static void i915_pmu_event_stop(struct perf_event *event, int flags)
  695. {
  696. struct i915_pmu *pmu = event_to_pmu(event);
  697. if (!pmu->registered)
  698. goto out;
  699. if (flags & PERF_EF_UPDATE)
  700. i915_pmu_event_read(event);
  701. i915_pmu_disable(event);
  702. out:
  703. event->hw.state = PERF_HES_STOPPED;
  704. }
  705. static int i915_pmu_event_add(struct perf_event *event, int flags)
  706. {
  707. struct i915_pmu *pmu = event_to_pmu(event);
  708. if (!pmu->registered)
  709. return -ENODEV;
  710. if (flags & PERF_EF_START)
  711. i915_pmu_event_start(event, flags);
  712. return 0;
  713. }
  714. static void i915_pmu_event_del(struct perf_event *event, int flags)
  715. {
  716. i915_pmu_event_stop(event, PERF_EF_UPDATE);
  717. }
  718. struct i915_str_attribute {
  719. struct device_attribute attr;
  720. const char *str;
  721. };
  722. static ssize_t i915_pmu_format_show(struct device *dev,
  723. struct device_attribute *attr, char *buf)
  724. {
  725. struct i915_str_attribute *eattr;
  726. eattr = container_of(attr, struct i915_str_attribute, attr);
  727. return sysfs_emit(buf, "%s\n", eattr->str);
  728. }
  729. #define I915_PMU_FORMAT_ATTR(_name, _config) \
  730. (&((struct i915_str_attribute[]) { \
  731. { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
  732. .str = _config, } \
  733. })[0].attr.attr)
  734. static struct attribute *i915_pmu_format_attrs[] = {
  735. I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
  736. NULL,
  737. };
  738. static const struct attribute_group i915_pmu_format_attr_group = {
  739. .name = "format",
  740. .attrs = i915_pmu_format_attrs,
  741. };
  742. struct i915_ext_attribute {
  743. struct device_attribute attr;
  744. unsigned long val;
  745. };
  746. static ssize_t i915_pmu_event_show(struct device *dev,
  747. struct device_attribute *attr, char *buf)
  748. {
  749. struct i915_ext_attribute *eattr;
  750. eattr = container_of(attr, struct i915_ext_attribute, attr);
  751. return sysfs_emit(buf, "config=0x%lx\n", eattr->val);
  752. }
  753. #define __event(__counter, __name, __unit) \
  754. { \
  755. .counter = (__counter), \
  756. .name = (__name), \
  757. .unit = (__unit), \
  758. .global = false, \
  759. }
  760. #define __global_event(__counter, __name, __unit) \
  761. { \
  762. .counter = (__counter), \
  763. .name = (__name), \
  764. .unit = (__unit), \
  765. .global = true, \
  766. }
  767. #define __engine_event(__sample, __name) \
  768. { \
  769. .sample = (__sample), \
  770. .name = (__name), \
  771. }
  772. static struct i915_ext_attribute *
  773. add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
  774. {
  775. sysfs_attr_init(&attr->attr.attr);
  776. attr->attr.attr.name = name;
  777. attr->attr.attr.mode = 0444;
  778. attr->attr.show = i915_pmu_event_show;
  779. attr->val = config;
  780. return ++attr;
  781. }
  782. static struct perf_pmu_events_attr *
  783. add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
  784. const char *str)
  785. {
  786. sysfs_attr_init(&attr->attr.attr);
  787. attr->attr.attr.name = name;
  788. attr->attr.attr.mode = 0444;
  789. attr->attr.show = perf_event_sysfs_show;
  790. attr->event_str = str;
  791. return ++attr;
  792. }
  793. static struct attribute **
  794. create_event_attributes(struct i915_pmu *pmu)
  795. {
  796. struct drm_i915_private *i915 = pmu_to_i915(pmu);
  797. static const struct {
  798. unsigned int counter;
  799. const char *name;
  800. const char *unit;
  801. bool global;
  802. } events[] = {
  803. __event(0, "actual-frequency", "M"),
  804. __event(1, "requested-frequency", "M"),
  805. __global_event(2, "interrupts", NULL),
  806. __event(3, "rc6-residency", "ns"),
  807. __event(4, "software-gt-awake-time", "ns"),
  808. };
  809. static const struct {
  810. enum drm_i915_pmu_engine_sample sample;
  811. char *name;
  812. } engine_events[] = {
  813. __engine_event(I915_SAMPLE_BUSY, "busy"),
  814. __engine_event(I915_SAMPLE_SEMA, "sema"),
  815. __engine_event(I915_SAMPLE_WAIT, "wait"),
  816. };
  817. unsigned int count = 0;
  818. struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
  819. struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
  820. struct attribute **attr = NULL, **attr_iter;
  821. struct intel_engine_cs *engine;
  822. struct intel_gt *gt;
  823. unsigned int i, j;
  824. /* Count how many counters we will be exposing. */
  825. for_each_gt(gt, i915, j) {
  826. for (i = 0; i < ARRAY_SIZE(events); i++) {
  827. u64 config = ___I915_PMU_OTHER(j, events[i].counter);
  828. if (!config_status(i915, config))
  829. count++;
  830. }
  831. }
  832. for_each_uabi_engine(engine, i915) {
  833. for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
  834. if (!engine_event_status(engine,
  835. engine_events[i].sample))
  836. count++;
  837. }
  838. }
  839. /* Allocate attribute objects and table. */
  840. i915_attr = kzalloc_objs(*i915_attr, count);
  841. if (!i915_attr)
  842. goto err_alloc;
  843. pmu_attr = kzalloc_objs(*pmu_attr, count);
  844. if (!pmu_attr)
  845. goto err_alloc;
  846. /* Max one pointer of each attribute type plus a termination entry. */
  847. attr = kzalloc_objs(*attr, count * 2 + 1);
  848. if (!attr)
  849. goto err_alloc;
  850. i915_iter = i915_attr;
  851. pmu_iter = pmu_attr;
  852. attr_iter = attr;
  853. /* Initialize supported non-engine counters. */
  854. for_each_gt(gt, i915, j) {
  855. for (i = 0; i < ARRAY_SIZE(events); i++) {
  856. u64 config = ___I915_PMU_OTHER(j, events[i].counter);
  857. char *str;
  858. if (config_status(i915, config))
  859. continue;
  860. if (events[i].global || !HAS_EXTRA_GT_LIST(i915))
  861. str = kstrdup(events[i].name, GFP_KERNEL);
  862. else
  863. str = kasprintf(GFP_KERNEL, "%s-gt%u",
  864. events[i].name, j);
  865. if (!str)
  866. goto err;
  867. *attr_iter++ = &i915_iter->attr.attr;
  868. i915_iter = add_i915_attr(i915_iter, str, config);
  869. if (events[i].unit) {
  870. if (events[i].global || !HAS_EXTRA_GT_LIST(i915))
  871. str = kasprintf(GFP_KERNEL, "%s.unit",
  872. events[i].name);
  873. else
  874. str = kasprintf(GFP_KERNEL, "%s-gt%u.unit",
  875. events[i].name, j);
  876. if (!str)
  877. goto err;
  878. *attr_iter++ = &pmu_iter->attr.attr;
  879. pmu_iter = add_pmu_attr(pmu_iter, str,
  880. events[i].unit);
  881. }
  882. }
  883. }
  884. /* Initialize supported engine counters. */
  885. for_each_uabi_engine(engine, i915) {
  886. for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
  887. char *str;
  888. if (engine_event_status(engine,
  889. engine_events[i].sample))
  890. continue;
  891. str = kasprintf(GFP_KERNEL, "%s-%s",
  892. engine->name, engine_events[i].name);
  893. if (!str)
  894. goto err;
  895. *attr_iter++ = &i915_iter->attr.attr;
  896. i915_iter =
  897. add_i915_attr(i915_iter, str,
  898. __I915_PMU_ENGINE(engine->uabi_class,
  899. engine->uabi_instance,
  900. engine_events[i].sample));
  901. str = kasprintf(GFP_KERNEL, "%s-%s.unit",
  902. engine->name, engine_events[i].name);
  903. if (!str)
  904. goto err;
  905. *attr_iter++ = &pmu_iter->attr.attr;
  906. pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
  907. }
  908. }
  909. pmu->i915_attr = i915_attr;
  910. pmu->pmu_attr = pmu_attr;
  911. return attr;
  912. err:;
  913. for (attr_iter = attr; *attr_iter; attr_iter++)
  914. kfree((*attr_iter)->name);
  915. err_alloc:
  916. kfree(attr);
  917. kfree(i915_attr);
  918. kfree(pmu_attr);
  919. return NULL;
  920. }
  921. static void free_event_attributes(struct i915_pmu *pmu)
  922. {
  923. struct attribute **attr_iter = pmu->events_attr_group.attrs;
  924. for (; *attr_iter; attr_iter++)
  925. kfree((*attr_iter)->name);
  926. kfree(pmu->events_attr_group.attrs);
  927. kfree(pmu->i915_attr);
  928. kfree(pmu->pmu_attr);
  929. pmu->events_attr_group.attrs = NULL;
  930. pmu->i915_attr = NULL;
  931. pmu->pmu_attr = NULL;
  932. }
  933. void i915_pmu_register(struct drm_i915_private *i915)
  934. {
  935. struct i915_pmu *pmu = &i915->pmu;
  936. const struct attribute_group *attr_groups[] = {
  937. &i915_pmu_format_attr_group,
  938. &pmu->events_attr_group,
  939. NULL
  940. };
  941. int ret = -ENOMEM;
  942. spin_lock_init(&pmu->lock);
  943. hrtimer_setup(&pmu->timer, i915_sample, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  944. init_rc6(pmu);
  945. if (IS_DGFX(i915)) {
  946. pmu->name = kasprintf(GFP_KERNEL,
  947. "i915_%s",
  948. dev_name(i915->drm.dev));
  949. if (pmu->name) {
  950. /* tools/perf reserves colons as special. */
  951. strreplace((char *)pmu->name, ':', '_');
  952. }
  953. } else {
  954. pmu->name = "i915";
  955. }
  956. if (!pmu->name)
  957. goto err;
  958. pmu->events_attr_group.name = "events";
  959. pmu->events_attr_group.attrs = create_event_attributes(pmu);
  960. if (!pmu->events_attr_group.attrs)
  961. goto err_name;
  962. pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
  963. GFP_KERNEL);
  964. if (!pmu->base.attr_groups)
  965. goto err_attr;
  966. pmu->base.module = THIS_MODULE;
  967. pmu->base.task_ctx_nr = perf_invalid_context;
  968. pmu->base.scope = PERF_PMU_SCOPE_SYS_WIDE;
  969. pmu->base.event_init = i915_pmu_event_init;
  970. pmu->base.add = i915_pmu_event_add;
  971. pmu->base.del = i915_pmu_event_del;
  972. pmu->base.start = i915_pmu_event_start;
  973. pmu->base.stop = i915_pmu_event_stop;
  974. pmu->base.read = i915_pmu_event_read;
  975. ret = perf_pmu_register(&pmu->base, pmu->name, -1);
  976. if (ret)
  977. goto err_groups;
  978. pmu->registered = true;
  979. return;
  980. err_groups:
  981. kfree(pmu->base.attr_groups);
  982. err_attr:
  983. free_event_attributes(pmu);
  984. err_name:
  985. if (IS_DGFX(i915))
  986. kfree(pmu->name);
  987. err:
  988. drm_notice(&i915->drm, "Failed to register PMU!\n");
  989. }
  990. void i915_pmu_unregister(struct drm_i915_private *i915)
  991. {
  992. struct i915_pmu *pmu = &i915->pmu;
  993. if (!pmu->registered)
  994. return;
  995. /* Disconnect the PMU callbacks */
  996. pmu->registered = false;
  997. hrtimer_cancel(&pmu->timer);
  998. perf_pmu_unregister(&pmu->base);
  999. kfree(pmu->base.attr_groups);
  1000. if (IS_DGFX(i915))
  1001. kfree(pmu->name);
  1002. free_event_attributes(pmu);
  1003. }