hw_breakpoint.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright (C) 2007 Alan Stern
  4. * Copyright (C) IBM Corporation, 2009
  5. * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
  6. *
  7. * Thanks to Ingo Molnar for his many suggestions.
  8. *
  9. * Authors: Alan Stern <stern@rowland.harvard.edu>
  10. * K.Prasad <prasad@linux.vnet.ibm.com>
  11. * Frederic Weisbecker <fweisbec@gmail.com>
  12. */
  13. /*
  14. * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  15. * using the CPU's debug registers.
  16. * This file contains the arch-independent routines.
  17. */
  18. #include <linux/hw_breakpoint.h>
  19. #include <linux/atomic.h>
  20. #include <linux/bug.h>
  21. #include <linux/cpu.h>
  22. #include <linux/export.h>
  23. #include <linux/init.h>
  24. #include <linux/irqflags.h>
  25. #include <linux/kdebug.h>
  26. #include <linux/kernel.h>
  27. #include <linux/mutex.h>
  28. #include <linux/notifier.h>
  29. #include <linux/percpu-rwsem.h>
  30. #include <linux/percpu.h>
  31. #include <linux/rhashtable.h>
  32. #include <linux/sched.h>
  33. #include <linux/slab.h>
  34. /*
  35. * Datastructure to track the total uses of N slots across tasks or CPUs;
  36. * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots.
  37. */
  38. struct bp_slots_histogram {
  39. #ifdef hw_breakpoint_slots
  40. atomic_t count[hw_breakpoint_slots(0)];
  41. #else
  42. atomic_t *count;
  43. #endif
  44. };
  45. /*
  46. * Per-CPU constraints data.
  47. */
  48. struct bp_cpuinfo {
  49. /* Number of pinned CPU breakpoints in a CPU. */
  50. unsigned int cpu_pinned;
  51. /* Histogram of pinned task breakpoints in a CPU. */
  52. struct bp_slots_histogram tsk_pinned;
  53. };
  54. static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
  55. static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
  56. {
  57. return per_cpu_ptr(bp_cpuinfo + type, cpu);
  58. }
  59. /* Number of pinned CPU breakpoints globally. */
  60. static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
  61. /* Number of pinned CPU-independent task breakpoints. */
  62. static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX];
  63. /* Keep track of the breakpoints attached to tasks */
  64. static struct rhltable task_bps_ht;
  65. static const struct rhashtable_params task_bps_ht_params = {
  66. .head_offset = offsetof(struct hw_perf_event, bp_list),
  67. .key_offset = offsetof(struct hw_perf_event, target),
  68. .key_len = sizeof_field(struct hw_perf_event, target),
  69. .automatic_shrinking = true,
  70. };
  71. static bool constraints_initialized __ro_after_init;
  72. /*
  73. * Synchronizes accesses to the per-CPU constraints; the locking rules are:
  74. *
  75. * 1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock
  76. * (due to bp_slots_histogram::count being atomic, no update are lost).
  77. *
  78. * 2. Holding a write-lock is required for computations that require a
  79. * stable snapshot of all bp_cpuinfo::tsk_pinned.
  80. *
  81. * 3. In all other cases, non-atomic accesses require the appropriately held
  82. * lock (read-lock for read-only accesses; write-lock for reads/writes).
  83. */
  84. DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem);
  85. /*
  86. * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since
  87. * rhltable synchronizes concurrent insertions/deletions, independent tasks may
  88. * insert/delete concurrently; therefore, a mutex per task is sufficient.
  89. *
  90. * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a
  91. * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is
  92. * that hw_breakpoint may contend with per-task perf event list management. The
  93. * assumption is that perf usecases involving hw_breakpoints are very unlikely
  94. * to result in unnecessary contention.
  95. */
  96. static inline struct mutex *get_task_bps_mutex(struct perf_event *bp)
  97. {
  98. struct task_struct *tsk = bp->hw.target;
  99. return tsk ? &tsk->perf_event_mutex : NULL;
  100. }
  101. static struct mutex *bp_constraints_lock(struct perf_event *bp)
  102. {
  103. struct mutex *tsk_mtx = get_task_bps_mutex(bp);
  104. if (tsk_mtx) {
  105. /*
  106. * Fully analogous to the perf_try_init_event() nesting
  107. * argument in the comment near perf_event_ctx_lock_nested();
  108. * this child->perf_event_mutex cannot ever deadlock against
  109. * the parent->perf_event_mutex usage from
  110. * perf_event_task_{en,dis}able().
  111. *
  112. * Specifically, inherited events will never occur on
  113. * ->perf_event_list.
  114. */
  115. mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING);
  116. percpu_down_read(&bp_cpuinfo_sem);
  117. } else {
  118. percpu_down_write(&bp_cpuinfo_sem);
  119. }
  120. return tsk_mtx;
  121. }
  122. static void bp_constraints_unlock(struct mutex *tsk_mtx)
  123. {
  124. if (tsk_mtx) {
  125. percpu_up_read(&bp_cpuinfo_sem);
  126. mutex_unlock(tsk_mtx);
  127. } else {
  128. percpu_up_write(&bp_cpuinfo_sem);
  129. }
  130. }
  131. static bool bp_constraints_is_locked(struct perf_event *bp)
  132. {
  133. struct mutex *tsk_mtx = get_task_bps_mutex(bp);
  134. return percpu_is_write_locked(&bp_cpuinfo_sem) ||
  135. (tsk_mtx ? mutex_is_locked(tsk_mtx) :
  136. percpu_is_read_locked(&bp_cpuinfo_sem));
  137. }
  138. static inline void assert_bp_constraints_lock_held(struct perf_event *bp)
  139. {
  140. struct mutex *tsk_mtx = get_task_bps_mutex(bp);
  141. if (tsk_mtx)
  142. lockdep_assert_held(tsk_mtx);
  143. lockdep_assert_held(&bp_cpuinfo_sem);
  144. }
  145. #ifdef hw_breakpoint_slots
  146. /*
  147. * Number of breakpoint slots is constant, and the same for all types.
  148. */
  149. static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
  150. static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); }
  151. static inline int init_breakpoint_slots(void) { return 0; }
  152. #else
  153. /*
  154. * Dynamic number of breakpoint slots.
  155. */
  156. static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
  157. static inline int hw_breakpoint_slots_cached(int type)
  158. {
  159. return __nr_bp_slots[type];
  160. }
  161. static __init bool
  162. bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type)
  163. {
  164. hist->count = kzalloc_objs(*hist->count,
  165. hw_breakpoint_slots_cached(type));
  166. return hist->count;
  167. }
  168. static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist)
  169. {
  170. kfree(hist->count);
  171. }
  172. static __init int init_breakpoint_slots(void)
  173. {
  174. int i, cpu, err_cpu;
  175. for (i = 0; i < TYPE_MAX; i++)
  176. __nr_bp_slots[i] = hw_breakpoint_slots(i);
  177. for_each_possible_cpu(cpu) {
  178. for (i = 0; i < TYPE_MAX; i++) {
  179. struct bp_cpuinfo *info = get_bp_info(cpu, i);
  180. if (!bp_slots_histogram_alloc(&info->tsk_pinned, i))
  181. goto err;
  182. }
  183. }
  184. for (i = 0; i < TYPE_MAX; i++) {
  185. if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
  186. goto err;
  187. if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i))
  188. goto err;
  189. }
  190. return 0;
  191. err:
  192. for_each_possible_cpu(err_cpu) {
  193. for (i = 0; i < TYPE_MAX; i++)
  194. bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned);
  195. if (err_cpu == cpu)
  196. break;
  197. }
  198. for (i = 0; i < TYPE_MAX; i++) {
  199. bp_slots_histogram_free(&cpu_pinned[i]);
  200. bp_slots_histogram_free(&tsk_pinned_all[i]);
  201. }
  202. return -ENOMEM;
  203. }
  204. #endif
  205. static inline void
  206. bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val)
  207. {
  208. const int old_idx = old - 1;
  209. const int new_idx = old_idx + val;
  210. if (old_idx >= 0)
  211. WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0);
  212. if (new_idx >= 0)
  213. WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0);
  214. }
  215. static int
  216. bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type)
  217. {
  218. for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
  219. const int count = atomic_read(&hist->count[i]);
  220. /* Catch unexpected writers; we want a stable snapshot. */
  221. ASSERT_EXCLUSIVE_WRITER(hist->count[i]);
  222. if (count > 0)
  223. return i + 1;
  224. WARN(count < 0, "inconsistent breakpoint slots histogram");
  225. }
  226. return 0;
  227. }
  228. static int
  229. bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2,
  230. enum bp_type_idx type)
  231. {
  232. for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
  233. const int count1 = atomic_read(&hist1->count[i]);
  234. const int count2 = atomic_read(&hist2->count[i]);
  235. /* Catch unexpected writers; we want a stable snapshot. */
  236. ASSERT_EXCLUSIVE_WRITER(hist1->count[i]);
  237. ASSERT_EXCLUSIVE_WRITER(hist2->count[i]);
  238. if (count1 + count2 > 0)
  239. return i + 1;
  240. WARN(count1 < 0, "inconsistent breakpoint slots histogram");
  241. WARN(count2 < 0, "inconsistent breakpoint slots histogram");
  242. }
  243. return 0;
  244. }
  245. #ifndef hw_breakpoint_weight
  246. static inline int hw_breakpoint_weight(struct perf_event *bp)
  247. {
  248. return 1;
  249. }
  250. #endif
  251. static inline enum bp_type_idx find_slot_idx(u64 bp_type)
  252. {
  253. if (bp_type & HW_BREAKPOINT_RW)
  254. return TYPE_DATA;
  255. return TYPE_INST;
  256. }
  257. /*
  258. * Return the maximum number of pinned breakpoints a task has in this CPU.
  259. */
  260. static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  261. {
  262. struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned;
  263. /*
  264. * At this point we want to have acquired the bp_cpuinfo_sem as a
  265. * writer to ensure that there are no concurrent writers in
  266. * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot.
  267. */
  268. lockdep_assert_held_write(&bp_cpuinfo_sem);
  269. return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type);
  270. }
  271. /*
  272. * Count the number of breakpoints of the same type and same task.
  273. * The given event must be not on the list.
  274. *
  275. * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
  276. * returns a negative value.
  277. */
  278. static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
  279. {
  280. struct rhlist_head *head, *pos;
  281. struct perf_event *iter;
  282. int count = 0;
  283. /*
  284. * We need a stable snapshot of the per-task breakpoint list.
  285. */
  286. assert_bp_constraints_lock_held(bp);
  287. rcu_read_lock();
  288. head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params);
  289. if (!head)
  290. goto out;
  291. rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
  292. if (find_slot_idx(iter->attr.bp_type) != type)
  293. continue;
  294. if (iter->cpu >= 0) {
  295. if (cpu == -1) {
  296. count = -1;
  297. goto out;
  298. } else if (cpu != iter->cpu)
  299. continue;
  300. }
  301. count += hw_breakpoint_weight(iter);
  302. }
  303. out:
  304. rcu_read_unlock();
  305. return count;
  306. }
  307. static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
  308. {
  309. if (bp->cpu >= 0)
  310. return cpumask_of(bp->cpu);
  311. return cpu_possible_mask;
  312. }
  313. /*
  314. * Returns the max pinned breakpoint slots in a given
  315. * CPU (cpu > -1) or across all of them (cpu = -1).
  316. */
  317. static int
  318. max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
  319. {
  320. const struct cpumask *cpumask = cpumask_of_bp(bp);
  321. int pinned_slots = 0;
  322. int cpu;
  323. if (bp->hw.target && bp->cpu < 0) {
  324. int max_pinned = task_bp_pinned(-1, bp, type);
  325. if (max_pinned >= 0) {
  326. /*
  327. * Fast path: task_bp_pinned() is CPU-independent and
  328. * returns the same value for any CPU.
  329. */
  330. max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
  331. return max_pinned;
  332. }
  333. }
  334. for_each_cpu(cpu, cpumask) {
  335. struct bp_cpuinfo *info = get_bp_info(cpu, type);
  336. int nr;
  337. nr = info->cpu_pinned;
  338. if (!bp->hw.target)
  339. nr += max_task_bp_pinned(cpu, type);
  340. else
  341. nr += task_bp_pinned(cpu, bp, type);
  342. pinned_slots = max(nr, pinned_slots);
  343. }
  344. return pinned_slots;
  345. }
  346. /*
  347. * Add/remove the given breakpoint in our constraint table
  348. */
  349. static int
  350. toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight)
  351. {
  352. int cpu, next_tsk_pinned;
  353. if (!enable)
  354. weight = -weight;
  355. if (!bp->hw.target) {
  356. /*
  357. * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the
  358. * global histogram.
  359. */
  360. struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
  361. lockdep_assert_held_write(&bp_cpuinfo_sem);
  362. bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
  363. info->cpu_pinned += weight;
  364. return 0;
  365. }
  366. /*
  367. * If bp->hw.target, tsk_pinned is only modified, but not used
  368. * otherwise. We can permit concurrent updates as long as there are no
  369. * other uses: having acquired bp_cpuinfo_sem as a reader allows
  370. * concurrent updates here. Uses of tsk_pinned will require acquiring
  371. * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value.
  372. */
  373. lockdep_assert_held_read(&bp_cpuinfo_sem);
  374. /*
  375. * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global
  376. * histogram. We need to take care of 4 cases:
  377. *
  378. * 1. This breakpoint targets all CPUs (cpu < 0), and there may only
  379. * exist other task breakpoints targeting all CPUs. In this case we
  380. * can simply update the global slots histogram.
  381. *
  382. * 2. This breakpoint targets a specific CPU (cpu >= 0), but there may
  383. * only exist other task breakpoints targeting all CPUs.
  384. *
  385. * a. On enable: remove the existing breakpoints from the global
  386. * slots histogram and use the per-CPU histogram.
  387. *
  388. * b. On disable: re-insert the existing breakpoints into the global
  389. * slots histogram and remove from per-CPU histogram.
  390. *
  391. * 3. Some other existing task breakpoints target specific CPUs. Only
  392. * update the per-CPU slots histogram.
  393. */
  394. if (!enable) {
  395. /*
  396. * Remove before updating histograms so we can determine if this
  397. * was the last task breakpoint for a specific CPU.
  398. */
  399. int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
  400. if (ret)
  401. return ret;
  402. }
  403. /*
  404. * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint.
  405. */
  406. next_tsk_pinned = task_bp_pinned(-1, bp, type);
  407. if (next_tsk_pinned >= 0) {
  408. if (bp->cpu < 0) { /* Case 1: fast path */
  409. if (!enable)
  410. next_tsk_pinned += hw_breakpoint_weight(bp);
  411. bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight);
  412. } else if (enable) { /* Case 2.a: slow path */
  413. /* Add existing to per-CPU histograms. */
  414. for_each_possible_cpu(cpu) {
  415. bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
  416. 0, next_tsk_pinned);
  417. }
  418. /* Add this first CPU-pinned task breakpoint. */
  419. bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
  420. next_tsk_pinned, weight);
  421. /* Rebalance global task pinned histogram. */
  422. bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned,
  423. -next_tsk_pinned);
  424. } else { /* Case 2.b: slow path */
  425. /* Remove this last CPU-pinned task breakpoint. */
  426. bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
  427. next_tsk_pinned + hw_breakpoint_weight(bp), weight);
  428. /* Remove all from per-CPU histograms. */
  429. for_each_possible_cpu(cpu) {
  430. bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
  431. next_tsk_pinned, -next_tsk_pinned);
  432. }
  433. /* Rebalance global task pinned histogram. */
  434. bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned);
  435. }
  436. } else { /* Case 3: slow path */
  437. const struct cpumask *cpumask = cpumask_of_bp(bp);
  438. for_each_cpu(cpu, cpumask) {
  439. next_tsk_pinned = task_bp_pinned(cpu, bp, type);
  440. if (!enable)
  441. next_tsk_pinned += hw_breakpoint_weight(bp);
  442. bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
  443. next_tsk_pinned, weight);
  444. }
  445. }
  446. /*
  447. * Readers want a stable snapshot of the per-task breakpoint list.
  448. */
  449. assert_bp_constraints_lock_held(bp);
  450. if (enable)
  451. return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
  452. return 0;
  453. }
  454. /*
  455. * Constraints to check before allowing this new breakpoint counter.
  456. *
  457. * Note: Flexible breakpoints are currently unimplemented, but outlined in the
  458. * below algorithm for completeness. The implementation treats flexible as
  459. * pinned due to no guarantee that we currently always schedule flexible events
  460. * before a pinned event in a same CPU.
  461. *
  462. * == Non-pinned counter == (Considered as pinned for now)
  463. *
  464. * - If attached to a single cpu, check:
  465. *
  466. * (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
  467. * + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
  468. *
  469. * -> If there are already non-pinned counters in this cpu, it means
  470. * there is already a free slot for them.
  471. * Otherwise, we check that the maximum number of per task
  472. * breakpoints (for this cpu) plus the number of per cpu breakpoint
  473. * (for this cpu) doesn't cover every registers.
  474. *
  475. * - If attached to every cpus, check:
  476. *
  477. * (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
  478. * + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
  479. *
  480. * -> This is roughly the same, except we check the number of per cpu
  481. * bp for every cpu and we keep the max one. Same for the per tasks
  482. * breakpoints.
  483. *
  484. *
  485. * == Pinned counter ==
  486. *
  487. * - If attached to a single cpu, check:
  488. *
  489. * ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
  490. * + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
  491. *
  492. * -> Same checks as before. But now the info->flexible, if any, must keep
  493. * one register at least (or they will never be fed).
  494. *
  495. * - If attached to every cpus, check:
  496. *
  497. * ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
  498. * + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
  499. */
  500. static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
  501. {
  502. enum bp_type_idx type;
  503. int max_pinned_slots;
  504. int weight;
  505. /* We couldn't initialize breakpoint constraints on boot */
  506. if (!constraints_initialized)
  507. return -ENOMEM;
  508. /* Basic checks */
  509. if (bp_type == HW_BREAKPOINT_EMPTY ||
  510. bp_type == HW_BREAKPOINT_INVALID)
  511. return -EINVAL;
  512. type = find_slot_idx(bp_type);
  513. weight = hw_breakpoint_weight(bp);
  514. /* Check if this new breakpoint can be satisfied across all CPUs. */
  515. max_pinned_slots = max_bp_pinned_slots(bp, type) + weight;
  516. if (max_pinned_slots > hw_breakpoint_slots_cached(type))
  517. return -ENOSPC;
  518. return toggle_bp_slot(bp, true, type, weight);
  519. }
  520. int reserve_bp_slot(struct perf_event *bp)
  521. {
  522. struct mutex *mtx = bp_constraints_lock(bp);
  523. int ret = __reserve_bp_slot(bp, bp->attr.bp_type);
  524. bp_constraints_unlock(mtx);
  525. return ret;
  526. }
  527. static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
  528. {
  529. enum bp_type_idx type;
  530. int weight;
  531. type = find_slot_idx(bp_type);
  532. weight = hw_breakpoint_weight(bp);
  533. WARN_ON(toggle_bp_slot(bp, false, type, weight));
  534. }
  535. void release_bp_slot(struct perf_event *bp)
  536. {
  537. struct mutex *mtx = bp_constraints_lock(bp);
  538. __release_bp_slot(bp, bp->attr.bp_type);
  539. bp_constraints_unlock(mtx);
  540. }
  541. static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
  542. {
  543. int err;
  544. __release_bp_slot(bp, old_type);
  545. err = __reserve_bp_slot(bp, new_type);
  546. if (err) {
  547. /*
  548. * Reserve the old_type slot back in case
  549. * there's no space for the new type.
  550. *
  551. * This must succeed, because we just released
  552. * the old_type slot in the __release_bp_slot
  553. * call above. If not, something is broken.
  554. */
  555. WARN_ON(__reserve_bp_slot(bp, old_type));
  556. }
  557. return err;
  558. }
  559. static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
  560. {
  561. struct mutex *mtx = bp_constraints_lock(bp);
  562. int ret = __modify_bp_slot(bp, old_type, new_type);
  563. bp_constraints_unlock(mtx);
  564. return ret;
  565. }
  566. /*
  567. * Allow the kernel debugger to reserve breakpoint slots without
  568. * taking a lock using the dbg_* variant of for the reserve and
  569. * release breakpoint slots.
  570. */
  571. int dbg_reserve_bp_slot(struct perf_event *bp)
  572. {
  573. int ret;
  574. if (bp_constraints_is_locked(bp))
  575. return -1;
  576. /* Locks aren't held; disable lockdep assert checking. */
  577. lockdep_off();
  578. ret = __reserve_bp_slot(bp, bp->attr.bp_type);
  579. lockdep_on();
  580. return ret;
  581. }
  582. int dbg_release_bp_slot(struct perf_event *bp)
  583. {
  584. if (bp_constraints_is_locked(bp))
  585. return -1;
  586. /* Locks aren't held; disable lockdep assert checking. */
  587. lockdep_off();
  588. __release_bp_slot(bp, bp->attr.bp_type);
  589. lockdep_on();
  590. return 0;
  591. }
  592. static int hw_breakpoint_parse(struct perf_event *bp,
  593. const struct perf_event_attr *attr,
  594. struct arch_hw_breakpoint *hw)
  595. {
  596. int err;
  597. err = hw_breakpoint_arch_parse(bp, attr, hw);
  598. if (err)
  599. return err;
  600. if (arch_check_bp_in_kernelspace(hw)) {
  601. if (attr->exclude_kernel)
  602. return -EINVAL;
  603. /*
  604. * Don't let unprivileged users set a breakpoint in the trap
  605. * path to avoid trap recursion attacks.
  606. */
  607. if (!capable(CAP_SYS_ADMIN))
  608. return -EPERM;
  609. }
  610. return 0;
  611. }
  612. int register_perf_hw_breakpoint(struct perf_event *bp)
  613. {
  614. struct arch_hw_breakpoint hw = { };
  615. int err;
  616. err = reserve_bp_slot(bp);
  617. if (err)
  618. return err;
  619. err = hw_breakpoint_parse(bp, &bp->attr, &hw);
  620. if (err) {
  621. release_bp_slot(bp);
  622. return err;
  623. }
  624. bp->hw.info = hw;
  625. return 0;
  626. }
  627. /**
  628. * register_user_hw_breakpoint - register a hardware breakpoint for user space
  629. * @attr: breakpoint attributes
  630. * @triggered: callback to trigger when we hit the breakpoint
  631. * @context: context data could be used in the triggered callback
  632. * @tsk: pointer to 'task_struct' of the process to which the address belongs
  633. */
  634. struct perf_event *
  635. register_user_hw_breakpoint(struct perf_event_attr *attr,
  636. perf_overflow_handler_t triggered,
  637. void *context,
  638. struct task_struct *tsk)
  639. {
  640. return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
  641. context);
  642. }
  643. EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
  644. static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
  645. struct perf_event_attr *from)
  646. {
  647. to->bp_addr = from->bp_addr;
  648. to->bp_type = from->bp_type;
  649. to->bp_len = from->bp_len;
  650. to->disabled = from->disabled;
  651. }
  652. int
  653. modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
  654. bool check)
  655. {
  656. struct arch_hw_breakpoint hw = { };
  657. int err;
  658. err = hw_breakpoint_parse(bp, attr, &hw);
  659. if (err)
  660. return err;
  661. if (check) {
  662. struct perf_event_attr old_attr;
  663. old_attr = bp->attr;
  664. hw_breakpoint_copy_attr(&old_attr, attr);
  665. if (memcmp(&old_attr, attr, sizeof(*attr)))
  666. return -EINVAL;
  667. }
  668. if (bp->attr.bp_type != attr->bp_type) {
  669. err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
  670. if (err)
  671. return err;
  672. }
  673. hw_breakpoint_copy_attr(&bp->attr, attr);
  674. bp->hw.info = hw;
  675. return 0;
  676. }
  677. /**
  678. * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
  679. * @bp: the breakpoint structure to modify
  680. * @attr: new breakpoint attributes
  681. */
  682. int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
  683. {
  684. int err;
  685. /*
  686. * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
  687. * will not be possible to raise IPIs that invoke __perf_event_disable.
  688. * So call the function directly after making sure we are targeting the
  689. * current task.
  690. */
  691. if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
  692. perf_event_disable_local(bp);
  693. else
  694. perf_event_disable(bp);
  695. err = modify_user_hw_breakpoint_check(bp, attr, false);
  696. if (!bp->attr.disabled)
  697. perf_event_enable(bp);
  698. return err;
  699. }
  700. EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
  701. /**
  702. * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
  703. * @bp: the breakpoint structure to unregister
  704. */
  705. void unregister_hw_breakpoint(struct perf_event *bp)
  706. {
  707. if (!bp)
  708. return;
  709. perf_event_release_kernel(bp);
  710. }
  711. EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  712. /**
  713. * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
  714. * @attr: breakpoint attributes
  715. * @triggered: callback to trigger when we hit the breakpoint
  716. * @context: context data could be used in the triggered callback
  717. *
  718. * @return a set of per_cpu pointers to perf events
  719. */
  720. struct perf_event * __percpu *
  721. register_wide_hw_breakpoint(struct perf_event_attr *attr,
  722. perf_overflow_handler_t triggered,
  723. void *context)
  724. {
  725. struct perf_event * __percpu *cpu_events, *bp;
  726. long err = 0;
  727. int cpu;
  728. cpu_events = alloc_percpu(typeof(*cpu_events));
  729. if (!cpu_events)
  730. return ERR_PTR_PCPU(-ENOMEM);
  731. cpus_read_lock();
  732. for_each_online_cpu(cpu) {
  733. bp = perf_event_create_kernel_counter(attr, cpu, NULL,
  734. triggered, context);
  735. if (IS_ERR(bp)) {
  736. err = PTR_ERR(bp);
  737. break;
  738. }
  739. per_cpu(*cpu_events, cpu) = bp;
  740. }
  741. cpus_read_unlock();
  742. if (likely(!err))
  743. return cpu_events;
  744. unregister_wide_hw_breakpoint(cpu_events);
  745. return ERR_PTR_PCPU(err);
  746. }
  747. EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
  748. /**
  749. * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
  750. * @cpu_events: the per cpu set of events to unregister
  751. */
  752. void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
  753. {
  754. int cpu;
  755. for_each_possible_cpu(cpu)
  756. unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
  757. free_percpu(cpu_events);
  758. }
  759. EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
  760. /**
  761. * hw_breakpoint_is_used - check if breakpoints are currently used
  762. *
  763. * Returns: true if breakpoints are used, false otherwise.
  764. */
  765. bool hw_breakpoint_is_used(void)
  766. {
  767. int cpu;
  768. if (!constraints_initialized)
  769. return false;
  770. for_each_possible_cpu(cpu) {
  771. for (int type = 0; type < TYPE_MAX; ++type) {
  772. struct bp_cpuinfo *info = get_bp_info(cpu, type);
  773. if (info->cpu_pinned)
  774. return true;
  775. for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
  776. if (atomic_read(&info->tsk_pinned.count[slot]))
  777. return true;
  778. }
  779. }
  780. }
  781. for (int type = 0; type < TYPE_MAX; ++type) {
  782. for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
  783. /*
  784. * Warn, because if there are CPU pinned counters,
  785. * should never get here; bp_cpuinfo::cpu_pinned should
  786. * be consistent with the global cpu_pinned histogram.
  787. */
  788. if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
  789. return true;
  790. if (atomic_read(&tsk_pinned_all[type].count[slot]))
  791. return true;
  792. }
  793. }
  794. return false;
  795. }
  796. static struct notifier_block hw_breakpoint_exceptions_nb = {
  797. .notifier_call = hw_breakpoint_exceptions_notify,
  798. /* we need to be notified first */
  799. .priority = 0x7fffffff
  800. };
  801. static void bp_perf_event_destroy(struct perf_event *event)
  802. {
  803. release_bp_slot(event);
  804. }
  805. static int hw_breakpoint_event_init(struct perf_event *bp)
  806. {
  807. int err;
  808. if (bp->attr.type != PERF_TYPE_BREAKPOINT)
  809. return -ENOENT;
  810. /*
  811. * Check if breakpoint type is supported before proceeding.
  812. * Also, no branch sampling for breakpoint events.
  813. */
  814. if (!hw_breakpoint_slots_cached(find_slot_idx(bp->attr.bp_type)) || has_branch_stack(bp))
  815. return -EOPNOTSUPP;
  816. err = register_perf_hw_breakpoint(bp);
  817. if (err)
  818. return err;
  819. bp->destroy = bp_perf_event_destroy;
  820. return 0;
  821. }
  822. static int hw_breakpoint_add(struct perf_event *bp, int flags)
  823. {
  824. if (!(flags & PERF_EF_START))
  825. bp->hw.state = PERF_HES_STOPPED;
  826. if (is_sampling_event(bp)) {
  827. bp->hw.last_period = bp->hw.sample_period;
  828. perf_swevent_set_period(bp);
  829. }
  830. return arch_install_hw_breakpoint(bp);
  831. }
  832. static void hw_breakpoint_del(struct perf_event *bp, int flags)
  833. {
  834. arch_uninstall_hw_breakpoint(bp);
  835. }
  836. static void hw_breakpoint_start(struct perf_event *bp, int flags)
  837. {
  838. bp->hw.state = 0;
  839. }
  840. static void hw_breakpoint_stop(struct perf_event *bp, int flags)
  841. {
  842. bp->hw.state = PERF_HES_STOPPED;
  843. }
  844. static struct pmu perf_breakpoint = {
  845. .task_ctx_nr = perf_sw_context, /* could eventually get its own */
  846. .event_init = hw_breakpoint_event_init,
  847. .add = hw_breakpoint_add,
  848. .del = hw_breakpoint_del,
  849. .start = hw_breakpoint_start,
  850. .stop = hw_breakpoint_stop,
  851. .read = hw_breakpoint_pmu_read,
  852. };
  853. int __init init_hw_breakpoint(void)
  854. {
  855. int ret;
  856. ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
  857. if (ret)
  858. return ret;
  859. ret = init_breakpoint_slots();
  860. if (ret)
  861. return ret;
  862. constraints_initialized = true;
  863. perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
  864. return register_die_notifier(&hw_breakpoint_exceptions_nb);
  865. }