histogram-design.rst 86 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118
  1. .. SPDX-License-Identifier: GPL-2.0
  2. ======================
  3. Histogram Design Notes
  4. ======================
  5. :Author: Tom Zanussi <zanussi@kernel.org>
  6. This document attempts to provide a description of how the ftrace
  7. histograms work and how the individual pieces map to the data
  8. structures used to implement them in trace_events_hist.c and
  9. tracing_map.c.
  10. .. note::
  11. All the ftrace histogram command examples assume the working
  12. directory is the ftrace /tracing directory. For example::
  13. # cd /sys/kernel/tracing
  14. Also, the histogram output displayed for those commands will be
  15. generally be truncated - only enough to make the point is displayed.
  16. 'hist_debug' trace event files
  17. ==============================
  18. If the kernel is compiled with CONFIG_HIST_TRIGGERS_DEBUG set, an
  19. event file named 'hist_debug' will appear in each event's
  20. subdirectory. This file can be read at any time and will display some
  21. of the hist trigger internals described in this document. Specific
  22. examples and output will be described in test cases below.
  23. Basic histograms
  24. ================
  25. First, basic histograms. Below is pretty much the simplest thing you
  26. can do with histograms - create one with a single key on a single
  27. event and cat the output::
  28. # echo 'hist:keys=pid' >> events/sched/sched_waking/trigger
  29. # cat events/sched/sched_waking/hist
  30. { pid: 18249 } hitcount: 1
  31. { pid: 13399 } hitcount: 1
  32. { pid: 17973 } hitcount: 1
  33. { pid: 12572 } hitcount: 1
  34. ...
  35. { pid: 10 } hitcount: 921
  36. { pid: 18255 } hitcount: 1444
  37. { pid: 25526 } hitcount: 2055
  38. { pid: 5257 } hitcount: 2055
  39. { pid: 27367 } hitcount: 2055
  40. { pid: 1728 } hitcount: 2161
  41. Totals:
  42. Hits: 21305
  43. Entries: 183
  44. Dropped: 0
  45. What this does is create a histogram on the sched_waking event using
  46. pid as a key and with a single value, hitcount, which even if not
  47. explicitly specified, exists for every histogram regardless.
  48. The hitcount value is a per-bucket value that's automatically
  49. incremented on every hit for the given key, which in this case is the
  50. pid.
  51. So in this histogram, there's a separate bucket for each pid, and each
  52. bucket contains a value for that bucket, counting the number of times
  53. sched_waking was called for that pid.
  54. Each histogram is represented by a hist_data struct.
  55. To keep track of each key and value field in the histogram, hist_data
  56. keeps an array of these fields named fields[]. The fields[] array is
  57. an array containing struct hist_field representations of each
  58. histogram val and key in the histogram (variables are also included
  59. here, but are discussed later). So for the above histogram we have one
  60. key and one value; in this case the one value is the hitcount value,
  61. which all histograms have, regardless of whether they define that
  62. value or not, which the above histogram does not.
  63. Each struct hist_field contains a pointer to the ftrace_event_field
  64. from the event's trace_event_file along with various bits related to
  65. that such as the size, offset, type, and a hist_field_fn_t function,
  66. which is used to grab the field's data from the ftrace event buffer
  67. (in most cases - some hist_fields such as hitcount don't directly map
  68. to an event field in the trace buffer - in these cases the function
  69. implementation gets its value from somewhere else). The flags field
  70. indicates which type of field it is - key, value, variable, variable
  71. reference, etc., with value being the default.
  72. The other important hist_data data structure in addition to the
  73. fields[] array is the tracing_map instance created for the histogram,
  74. which is held in the .map member. The tracing_map implements the
  75. lock-free hash table used to implement histograms (see
  76. kernel/trace/tracing_map.h for much more discussion about the
  77. low-level data structures implementing the tracing_map). For the
  78. purposes of this discussion, the tracing_map contains a number of
  79. buckets, each bucket corresponding to a particular tracing_map_elt
  80. object hashed by a given histogram key.
  81. Below is a diagram the first part of which describes the hist_data and
  82. associated key and value fields for the histogram described above. As
  83. you can see, there are two fields in the fields array, one val field
  84. for the hitcount and one key field for the pid key.
  85. Below that is a diagram of a run-time snapshot of what the tracing_map
  86. might look like for a given run. It attempts to show the
  87. relationships between the hist_data fields and the tracing_map
  88. elements for a couple hypothetical keys and values.::
  89. +------------------+
  90. | hist_data |
  91. +------------------+ +----------------+
  92. | .fields[] |---->| val = hitcount |----------------------------+
  93. +----------------+ +----------------+ |
  94. | .map | | .size | |
  95. +----------------+ +--------------+ |
  96. | .offset | |
  97. +--------------+ |
  98. | .fn() | |
  99. +--------------+ |
  100. . |
  101. . |
  102. . |
  103. +----------------+ <--- n_vals |
  104. | key = pid |----------------------------|--+
  105. +----------------+ | |
  106. | .size | | |
  107. +--------------+ | |
  108. | .offset | | |
  109. +--------------+ | |
  110. | .fn() | | |
  111. +----------------+ <--- n_fields | |
  112. | unused | | |
  113. +----------------+ | |
  114. | | | |
  115. +--------------+ | |
  116. | | | |
  117. +--------------+ | |
  118. | | | |
  119. +--------------+ | |
  120. n_keys = n_fields - n_vals | |
  121. The hist_data n_vals and n_fields delineate the extent of the fields[]
  122. array and separate keys from values for the rest of the code.
  123. Below is a run-time representation of the tracing_map part of the
  124. histogram, with pointers from various parts of the fields[] array
  125. to corresponding parts of the tracing_map.
  126. The tracing_map consists of an array of tracing_map_entrys and a set
  127. of preallocated tracing_map_elts (abbreviated below as map_entry and
  128. map_elt). The total number of map_entrys in the hist_data.map array =
  129. map->max_elts (actually map->map_size but only max_elts of those are
  130. used. This is a property required by the map_insert() algorithm).
  131. If a map_entry is unused, meaning no key has yet hashed into it, its
  132. .key value is 0 and its .val pointer is NULL. Once a map_entry has
  133. been claimed, the .key value contains the key's hash value and the
  134. .val member points to a map_elt containing the full key and an entry
  135. for each key or value in the map_elt.fields[] array. There is an
  136. entry in the map_elt.fields[] array corresponding to each hist_field
  137. in the histogram, and this is where the continually aggregated sums
  138. corresponding to each histogram value are kept.
  139. The diagram attempts to show the relationship between the
  140. hist_data.fields[] and the map_elt.fields[] with the links drawn
  141. between diagrams::
  142. +-----------+ | |
  143. | hist_data | | |
  144. +-----------+ | |
  145. | .fields | | |
  146. +---------+ +-----------+ | |
  147. | .map |---->| map_entry | | |
  148. +---------+ +-----------+ | |
  149. | .key |---> 0 | |
  150. +---------+ | |
  151. | .val |---> NULL | |
  152. +-----------+ | |
  153. | map_entry | | |
  154. +-----------+ | |
  155. | .key |---> pid = 999 | |
  156. +---------+ +-----------+ | |
  157. | .val |--->| map_elt | | |
  158. +---------+ +-----------+ | |
  159. . | .key |---> full key * | |
  160. . +---------+ +---------------+ | |
  161. . | .fields |--->| .sum (val) |<-+ |
  162. +-----------+ +---------+ | 2345 | | |
  163. | map_entry | +---------------+ | |
  164. +-----------+ | .offset (key) |<----+
  165. | .key |---> 0 | 0 | | |
  166. +---------+ +---------------+ | |
  167. | .val |---> NULL . | |
  168. +-----------+ . | |
  169. | map_entry | . | |
  170. +-----------+ +---------------+ | |
  171. | .key | | .sum (val) or | | |
  172. +---------+ +---------+ | .offset (key) | | |
  173. | .val |--->| map_elt | +---------------+ | |
  174. +-----------+ +---------+ | .sum (val) or | | |
  175. | map_entry | | .offset (key) | | |
  176. +-----------+ +---------------+ | |
  177. | .key |---> pid = 4444 | |
  178. +---------+ +-----------+ | |
  179. | .val | | map_elt | | |
  180. +---------+ +-----------+ | |
  181. | .key |---> full key * | |
  182. +---------+ +---------------+ | |
  183. | .fields |--->| .sum (val) |<-+ |
  184. +---------+ | 65523 | |
  185. +---------------+ |
  186. | .offset (key) |<----+
  187. | 0 |
  188. +---------------+
  189. .
  190. .
  191. .
  192. +---------------+
  193. | .sum (val) or |
  194. | .offset (key) |
  195. +---------------+
  196. | .sum (val) or |
  197. | .offset (key) |
  198. +---------------+
  199. Abbreviations used in the diagrams::
  200. hist_data = struct hist_trigger_data
  201. hist_data.fields = struct hist_field
  202. fn = hist_field_fn_t
  203. map_entry = struct tracing_map_entry
  204. map_elt = struct tracing_map_elt
  205. map_elt.fields = struct tracing_map_field
  206. Whenever a new event occurs and it has a hist trigger associated with
  207. it, event_hist_trigger() is called. event_hist_trigger() first deals
  208. with the key: for each subkey in the key (in the above example, there
  209. is just one subkey corresponding to pid), the hist_field that
  210. represents that subkey is retrieved from hist_data.fields[] and the
  211. hist_field_fn_t fn() associated with that field, along with the
  212. field's size and offset, is used to grab that subkey's data from the
  213. current trace record.
  214. Once the complete key has been retrieved, it's used to look that key
  215. up in the tracing_map. If there's no tracing_map_elt associated with
  216. that key, an empty one is claimed and inserted in the map for the new
  217. key. In either case, the tracing_map_elt associated with that key is
  218. returned.
  219. Once a tracing_map_elt available, hist_trigger_elt_update() is called.
  220. As the name implies, this updates the element, which basically means
  221. updating the element's fields. There's a tracing_map_field associated
  222. with each key and value in the histogram, and each of these correspond
  223. to the key and value hist_fields created when the histogram was
  224. created. hist_trigger_elt_update() goes through each value hist_field
  225. and, as for the keys, uses the hist_field's fn() and size and offset
  226. to grab the field's value from the current trace record. Once it has
  227. that value, it simply adds that value to that field's
  228. continually-updated tracing_map_field.sum member. Some hist_field
  229. fn()s, such as for the hitcount, don't actually grab anything from the
  230. trace record (the hitcount fn() just increments the counter sum by 1),
  231. but the idea is the same.
  232. Once all the values have been updated, hist_trigger_elt_update() is
  233. done and returns. Note that there are also tracing_map_fields for
  234. each subkey in the key, but hist_trigger_elt_update() doesn't look at
  235. them or update anything - those exist only for sorting, which can
  236. happen later.
  237. Basic histogram test
  238. --------------------
  239. This is a good example to try. It produces 3 value fields and 2 key
  240. fields in the output::
  241. # echo 'hist:keys=common_pid,call_site.sym:values=bytes_req,bytes_alloc,hitcount' >> events/kmem/kmalloc/trigger
  242. To see the debug data, cat the kmem/kmalloc's 'hist_debug' file. It
  243. will show the trigger info of the histogram it corresponds to, along
  244. with the address of the hist_data associated with the histogram, which
  245. will become useful in later examples. It then displays the number of
  246. total hist_fields associated with the histogram along with a count of
  247. how many of those correspond to keys and how many correspond to values.
  248. It then goes on to display details for each field, including the
  249. field's flags and the position of each field in the hist_data's
  250. fields[] array, which is useful information for verifying that things
  251. internally appear correct or not, and which again will become even
  252. more useful in further examples::
  253. # cat events/kmem/kmalloc/hist_debug
  254. # event histogram
  255. #
  256. # trigger info: hist:keys=common_pid,call_site.sym:vals=hitcount,bytes_req,bytes_alloc:sort=hitcount:size=2048 [active]
  257. #
  258. hist_data: 000000005e48c9a5
  259. n_vals: 3
  260. n_keys: 2
  261. n_fields: 5
  262. val fields:
  263. hist_data->fields[0]:
  264. flags:
  265. VAL: HIST_FIELD_FL_HITCOUNT
  266. type: u64
  267. size: 8
  268. is_signed: 0
  269. hist_data->fields[1]:
  270. flags:
  271. VAL: normal u64 value
  272. ftrace_event_field name: bytes_req
  273. type: size_t
  274. size: 8
  275. is_signed: 0
  276. hist_data->fields[2]:
  277. flags:
  278. VAL: normal u64 value
  279. ftrace_event_field name: bytes_alloc
  280. type: size_t
  281. size: 8
  282. is_signed: 0
  283. key fields:
  284. hist_data->fields[3]:
  285. flags:
  286. HIST_FIELD_FL_KEY
  287. ftrace_event_field name: common_pid
  288. type: int
  289. size: 8
  290. is_signed: 1
  291. hist_data->fields[4]:
  292. flags:
  293. HIST_FIELD_FL_KEY
  294. ftrace_event_field name: call_site
  295. type: unsigned long
  296. size: 8
  297. is_signed: 0
  298. The commands below can be used to clean things up for the next test::
  299. # echo '!hist:keys=common_pid,call_site.sym:values=bytes_req,bytes_alloc,hitcount' >> events/kmem/kmalloc/trigger
  300. Variables
  301. =========
  302. Variables allow data from one hist trigger to be saved by one hist
  303. trigger and retrieved by another hist trigger. For example, a trigger
  304. on the sched_waking event can capture a timestamp for a particular
  305. pid, and later a sched_switch event that switches to that pid event
  306. can grab the timestamp and use it to calculate a time delta between
  307. the two events::
  308. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >>
  309. events/sched/sched_waking/trigger
  310. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >>
  311. events/sched/sched_switch/trigger
  312. In terms of the histogram data structures, variables are implemented
  313. as another type of hist_field and for a given hist trigger are added
  314. to the hist_data.fields[] array just after all the val fields. To
  315. distinguish them from the existing key and val fields, they're given a
  316. new flag type, HIST_FIELD_FL_VAR (abbreviated FL_VAR) and they also
  317. make use of a new .var.idx field member in struct hist_field, which
  318. maps them to an index in a new map_elt.vars[] array added to the
  319. map_elt specifically designed to store and retrieve variable values.
  320. The diagram below shows those new elements and adds a new variable
  321. entry, ts0, corresponding to the ts0 variable in the sched_waking
  322. trigger above.
  323. sched_waking histogram
  324. ----------------------
  325. .. code-block::
  326. +------------------+
  327. | hist_data |<-------------------------------------------------------+
  328. +------------------+ +-------------------+ |
  329. | .fields[] |-->| val = hitcount | |
  330. +----------------+ +-------------------+ |
  331. | .map | | .size | |
  332. +----------------+ +-----------------+ |
  333. | .offset | |
  334. +-----------------+ |
  335. | .fn() | |
  336. +-----------------+ |
  337. | .flags | |
  338. +-----------------+ |
  339. | .var.idx | |
  340. +-------------------+ |
  341. | var = ts0 | |
  342. +-------------------+ |
  343. | .size | |
  344. +-----------------+ |
  345. | .offset | |
  346. +-----------------+ |
  347. | .fn() | |
  348. +-----------------+ |
  349. | .flags & FL_VAR | |
  350. +-----------------+ |
  351. | .var.idx |----------------------------+-+ |
  352. +-----------------+ | | |
  353. . | | |
  354. . | | |
  355. . | | |
  356. +-------------------+ <--- n_vals | | |
  357. | key = pid | | | |
  358. +-------------------+ | | |
  359. | .size | | | |
  360. +-----------------+ | | |
  361. | .offset | | | |
  362. +-----------------+ | | |
  363. | .fn() | | | |
  364. +-----------------+ | | |
  365. | .flags & FL_KEY | | | |
  366. +-----------------+ | | |
  367. | .var.idx | | | |
  368. +-------------------+ <--- n_fields | | |
  369. | unused | | | |
  370. +-------------------+ | | |
  371. | | | | |
  372. +-----------------+ | | |
  373. | | | | |
  374. +-----------------+ | | |
  375. | | | | |
  376. +-----------------+ | | |
  377. | | | | |
  378. +-----------------+ | | |
  379. | | | | |
  380. +-----------------+ | | |
  381. n_keys = n_fields - n_vals | | |
  382. | | |
  383. This is very similar to the basic case. In the above diagram, we can
  384. see a new .flags member has been added to the struct hist_field
  385. struct, and a new entry added to hist_data.fields representing the ts0
  386. variable. For a normal val hist_field, .flags is just 0 (modulo
  387. modifier flags), but if the value is defined as a variable, the .flags
  388. contains a set FL_VAR bit.
  389. As you can see, the ts0 entry's .var.idx member contains the index
  390. into the tracing_map_elts' .vars[] array containing variable values.
  391. This idx is used whenever the value of the variable is set or read.
  392. The map_elt.vars idx assigned to the given variable is assigned and
  393. saved in .var.idx by create_tracing_map_fields() after it calls
  394. tracing_map_add_var().
  395. Below is a representation of the histogram at run-time, which
  396. populates the map, along with correspondence to the above hist_data and
  397. hist_field data structures.
  398. The diagram attempts to show the relationship between the
  399. hist_data.fields[] and the map_elt.fields[] and map_elt.vars[] with
  400. the links drawn between diagrams. For each of the map_elts, you can
  401. see that the .fields[] members point to the .sum or .offset of a key
  402. or val and the .vars[] members point to the value of a variable. The
  403. arrows between the two diagrams show the linkages between those
  404. tracing_map members and the field definitions in the corresponding
  405. hist_data fields[] members.::
  406. +-----------+ | | |
  407. | hist_data | | | |
  408. +-----------+ | | |
  409. | .fields | | | |
  410. +---------+ +-----------+ | | |
  411. | .map |---->| map_entry | | | |
  412. +---------+ +-----------+ | | |
  413. | .key |---> 0 | | |
  414. +---------+ | | |
  415. | .val |---> NULL | | |
  416. +-----------+ | | |
  417. | map_entry | | | |
  418. +-----------+ | | |
  419. | .key |---> pid = 999 | | |
  420. +---------+ +-----------+ | | |
  421. | .val |--->| map_elt | | | |
  422. +---------+ +-----------+ | | |
  423. . | .key |---> full key * | | |
  424. . +---------+ +---------------+ | | |
  425. . | .fields |--->| .sum (val) | | | |
  426. . +---------+ | 2345 | | | |
  427. . +--| .vars | +---------------+ | | |
  428. . | +---------+ | .offset (key) | | | |
  429. . | | 0 | | | |
  430. . | +---------------+ | | |
  431. . | . | | |
  432. . | . | | |
  433. . | . | | |
  434. . | +---------------+ | | |
  435. . | | .sum (val) or | | | |
  436. . | | .offset (key) | | | |
  437. . | +---------------+ | | |
  438. . | | .sum (val) or | | | |
  439. . | | .offset (key) | | | |
  440. . | +---------------+ | | |
  441. . | | | |
  442. . +---------------->+---------------+ | | |
  443. . | ts0 |<--+ | |
  444. . | 113345679876 | | | |
  445. . +---------------+ | | |
  446. . | unused | | | |
  447. . | | | | |
  448. . +---------------+ | | |
  449. . . | | |
  450. . . | | |
  451. . . | | |
  452. . +---------------+ | | |
  453. . | unused | | | |
  454. . | | | | |
  455. . +---------------+ | | |
  456. . | unused | | | |
  457. . | | | | |
  458. . +---------------+ | | |
  459. . | | |
  460. +-----------+ | | |
  461. | map_entry | | | |
  462. +-----------+ | | |
  463. | .key |---> pid = 4444 | | |
  464. +---------+ +-----------+ | | |
  465. | .val |--->| map_elt | | | |
  466. +---------+ +-----------+ | | |
  467. . | .key |---> full key * | | |
  468. . +---------+ +---------------+ | | |
  469. . | .fields |--->| .sum (val) | | | |
  470. +---------+ | 2345 | | | |
  471. +--| .vars | +---------------+ | | |
  472. | +---------+ | .offset (key) | | | |
  473. | | 0 | | | |
  474. | +---------------+ | | |
  475. | . | | |
  476. | . | | |
  477. | . | | |
  478. | +---------------+ | | |
  479. | | .sum (val) or | | | |
  480. | | .offset (key) | | | |
  481. | +---------------+ | | |
  482. | | .sum (val) or | | | |
  483. | | .offset (key) | | | |
  484. | +---------------+ | | |
  485. | | | |
  486. | +---------------+ | | |
  487. +---------------->| ts0 |<--+ | |
  488. | 213499240729 | | |
  489. +---------------+ | |
  490. | unused | | |
  491. | | | |
  492. +---------------+ | |
  493. . | |
  494. . | |
  495. . | |
  496. +---------------+ | |
  497. | unused | | |
  498. | | | |
  499. +---------------+ | |
  500. | unused | | |
  501. | | | |
  502. +---------------+ | |
  503. For each used map entry, there's a map_elt pointing to an array of
  504. .vars containing the current value of the variables associated with
  505. that histogram entry. So in the above, the timestamp associated with
  506. pid 999 is 113345679876, and the timestamp variable in the same
  507. .var.idx for pid 4444 is 213499240729.
  508. sched_switch histogram
  509. ----------------------
  510. The sched_switch histogram paired with the above sched_waking
  511. histogram is shown below. The most important aspect of the
  512. sched_switch histogram is that it references a variable on the
  513. sched_waking histogram above.
  514. The histogram diagram is very similar to the others so far displayed,
  515. but it adds variable references. You can see the normal hitcount and
  516. key fields along with a new wakeup_lat variable implemented in the
  517. same way as the sched_waking ts0 variable, but in addition there's an
  518. entry with the new FL_VAR_REF (short for HIST_FIELD_FL_VAR_REF) flag.
  519. Associated with the new var ref field are a couple of new hist_field
  520. members, var.hist_data and var_ref_idx. For a variable reference, the
  521. var.hist_data goes with the var.idx, which together uniquely identify
  522. a particular variable on a particular histogram. The var_ref_idx is
  523. just the index into the var_ref_vals[] array that caches the values of
  524. each variable whenever a hist trigger is updated. Those resulting
  525. values are then finally accessed by other code such as trace action
  526. code that uses the var_ref_idx values to assign param values.
  527. The diagram below describes the situation for the sched_switch
  528. histogram referred to before::
  529. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >>
  530. events/sched/sched_switch/trigger
  531. | |
  532. +------------------+ | |
  533. | hist_data | | |
  534. +------------------+ +-----------------------+ | |
  535. | .fields[] |-->| val = hitcount | | |
  536. +----------------+ +-----------------------+ | |
  537. | .map | | .size | | |
  538. +----------------+ +---------------------+ | |
  539. +--| .var_refs[] | | .offset | | |
  540. | +----------------+ +---------------------+ | |
  541. | | .fn() | | |
  542. | var_ref_vals[] +---------------------+ | |
  543. | +-------------+ | .flags | | |
  544. | | $ts0 |<---+ +---------------------+ | |
  545. | +-------------+ | | .var.idx | | |
  546. | | | | +---------------------+ | |
  547. | +-------------+ | | .var.hist_data | | |
  548. | | | | +---------------------+ | |
  549. | +-------------+ | | .var_ref_idx | | |
  550. | | | | +-----------------------+ | |
  551. | +-------------+ | | var = wakeup_lat | | |
  552. | . | +-----------------------+ | |
  553. | . | | .size | | |
  554. | . | +---------------------+ | |
  555. | +-------------+ | | .offset | | |
  556. | | | | +---------------------+ | |
  557. | +-------------+ | | .fn() | | |
  558. | | | | +---------------------+ | |
  559. | +-------------+ | | .flags & FL_VAR | | |
  560. | | +---------------------+ | |
  561. | | | .var.idx | | |
  562. | | +---------------------+ | |
  563. | | | .var.hist_data | | |
  564. | | +---------------------+ | |
  565. | | | .var_ref_idx | | |
  566. | | +---------------------+ | |
  567. | | . | |
  568. | | . | |
  569. | | . | |
  570. | | +-----------------------+ <--- n_vals | |
  571. | | | key = pid | | |
  572. | | +-----------------------+ | |
  573. | | | .size | | |
  574. | | +---------------------+ | |
  575. | | | .offset | | |
  576. | | +---------------------+ | |
  577. | | | .fn() | | |
  578. | | +---------------------+ | |
  579. | | | .flags | | |
  580. | | +---------------------+ | |
  581. | | | .var.idx | | |
  582. | | +-----------------------+ <--- n_fields | |
  583. | | | unused | | |
  584. | | +-----------------------+ | |
  585. | | | | | |
  586. | | +---------------------+ | |
  587. | | | | | |
  588. | | +---------------------+ | |
  589. | | | | | |
  590. | | +---------------------+ | |
  591. | | | | | |
  592. | | +---------------------+ | |
  593. | | | | | |
  594. | | +---------------------+ | |
  595. | | n_keys = n_fields - n_vals | |
  596. | | | |
  597. | | | |
  598. | | +-----------------------+ | |
  599. +---------------------->| var_ref = $ts0 | | |
  600. | +-----------------------+ | |
  601. | | .size | | |
  602. | +---------------------+ | |
  603. | | .offset | | |
  604. | +---------------------+ | |
  605. | | .fn() | | |
  606. | +---------------------+ | |
  607. | | .flags & FL_VAR_REF | | |
  608. | +---------------------+ | |
  609. | | .var.idx |--------------------------+ |
  610. | +---------------------+ |
  611. | | .var.hist_data |----------------------------+
  612. | +---------------------+
  613. +---| .var_ref_idx |
  614. +---------------------+
  615. Abbreviations used in the diagrams::
  616. hist_data = struct hist_trigger_data
  617. hist_data.fields = struct hist_field
  618. fn = hist_field_fn_t
  619. FL_KEY = HIST_FIELD_FL_KEY
  620. FL_VAR = HIST_FIELD_FL_VAR
  621. FL_VAR_REF = HIST_FIELD_FL_VAR_REF
  622. When a hist trigger makes use of a variable, a new hist_field is
  623. created with flag HIST_FIELD_FL_VAR_REF. For a VAR_REF field, the
  624. var.idx and var.hist_data take the same values as the referenced
  625. variable, as well as the referenced variable's size, type, and
  626. is_signed values. The VAR_REF field's .name is set to the name of the
  627. variable it references. If a variable reference was created using the
  628. explicit system.event.$var_ref notation, the hist_field's system and
  629. event_name variables are also set.
  630. So, in order to handle an event for the sched_switch histogram,
  631. because we have a reference to a variable on another histogram, we
  632. need to resolve all variable references first. This is done via the
  633. resolve_var_refs() calls made from event_hist_trigger(). What this
  634. does is grabs the var_refs[] array from the hist_data representing the
  635. sched_switch histogram. For each one of those, the referenced
  636. variable's var.hist_data along with the current key is used to look up
  637. the corresponding tracing_map_elt in that histogram. Once found, the
  638. referenced variable's var.idx is used to look up the variable's value
  639. using tracing_map_read_var(elt, var.idx), which yields the value of
  640. the variable for that element, ts0 in the case above. Note that both
  641. the hist_fields representing both the variable and the variable
  642. reference have the same var.idx, so this is straightforward.
  643. Variable and variable reference test
  644. ------------------------------------
  645. This example creates a variable on the sched_waking event, ts0, and
  646. uses it in the sched_switch trigger. The sched_switch trigger also
  647. creates its own variable, wakeup_lat, but nothing yet uses it::
  648. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  649. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >> events/sched/sched_switch/trigger
  650. Looking at the sched_waking 'hist_debug' output, in addition to the
  651. normal key and value hist_fields, in the val fields section we see a
  652. field with the HIST_FIELD_FL_VAR flag, which indicates that that field
  653. represents a variable. Note that in addition to the variable name,
  654. contained in the var.name field, it includes the var.idx, which is the
  655. index into the tracing_map_elt.vars[] array of the actual variable
  656. location. Note also that the output shows that variables live in the
  657. same part of the hist_data->fields[] array as normal values::
  658. # cat events/sched/sched_waking/hist_debug
  659. # event histogram
  660. #
  661. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  662. #
  663. hist_data: 000000009536f554
  664. n_vals: 2
  665. n_keys: 1
  666. n_fields: 3
  667. val fields:
  668. hist_data->fields[0]:
  669. flags:
  670. VAL: HIST_FIELD_FL_HITCOUNT
  671. type: u64
  672. size: 8
  673. is_signed: 0
  674. hist_data->fields[1]:
  675. flags:
  676. HIST_FIELD_FL_VAR
  677. var.name: ts0
  678. var.idx (into tracing_map_elt.vars[]): 0
  679. type: u64
  680. size: 8
  681. is_signed: 0
  682. key fields:
  683. hist_data->fields[2]:
  684. flags:
  685. HIST_FIELD_FL_KEY
  686. ftrace_event_field name: pid
  687. type: pid_t
  688. size: 8
  689. is_signed: 1
  690. Moving on to the sched_switch trigger hist_debug output, in addition
  691. to the unused wakeup_lat variable, we see a new section displaying
  692. variable references. Variable references are displayed in a separate
  693. section because in addition to being logically separate from
  694. variables and values, they actually live in a separate hist_data
  695. array, var_refs[].
  696. In this example, the sched_switch trigger has a reference to a
  697. variable on the sched_waking trigger, $ts0. Looking at the details,
  698. we can see that the var.hist_data value of the referenced variable
  699. matches the previously displayed sched_waking trigger, and the var.idx
  700. value matches the previously displayed var.idx value for that
  701. variable. Also displayed is the var_ref_idx value for that variable
  702. reference, which is where the value for that variable is cached for
  703. use when the trigger is invoked::
  704. # cat events/sched/sched_switch/hist_debug
  705. # event histogram
  706. #
  707. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global [active]
  708. #
  709. hist_data: 00000000f4ee8006
  710. n_vals: 2
  711. n_keys: 1
  712. n_fields: 3
  713. val fields:
  714. hist_data->fields[0]:
  715. flags:
  716. VAL: HIST_FIELD_FL_HITCOUNT
  717. type: u64
  718. size: 8
  719. is_signed: 0
  720. hist_data->fields[1]:
  721. flags:
  722. HIST_FIELD_FL_VAR
  723. var.name: wakeup_lat
  724. var.idx (into tracing_map_elt.vars[]): 0
  725. type: u64
  726. size: 0
  727. is_signed: 0
  728. key fields:
  729. hist_data->fields[2]:
  730. flags:
  731. HIST_FIELD_FL_KEY
  732. ftrace_event_field name: next_pid
  733. type: pid_t
  734. size: 8
  735. is_signed: 1
  736. variable reference fields:
  737. hist_data->var_refs[0]:
  738. flags:
  739. HIST_FIELD_FL_VAR_REF
  740. name: ts0
  741. var.idx (into tracing_map_elt.vars[]): 0
  742. var.hist_data: 000000009536f554
  743. var_ref_idx (into hist_data->var_refs[]): 0
  744. type: u64
  745. size: 8
  746. is_signed: 0
  747. The commands below can be used to clean things up for the next test::
  748. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0' >> events/sched/sched_switch/trigger
  749. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  750. Actions and Handlers
  751. ====================
  752. Adding onto the previous example, we will now do something with that
  753. wakeup_lat variable, namely send it and another field as a synthetic
  754. event.
  755. The onmatch() action below basically says that whenever we have a
  756. sched_switch event, if we have a matching sched_waking event, in this
  757. case if we have a pid in the sched_waking histogram that matches the
  758. next_pid field on this sched_switch event, we retrieve the
  759. variables specified in the wakeup_latency() trace action, and use
  760. them to generate a new wakeup_latency event into the trace stream.
  761. Note that the way the trace handlers such as wakeup_latency() (which
  762. could equivalently be written trace(wakeup_latency,$wakeup_lat,next_pid)
  763. are implemented, the parameters specified to the trace handler must be
  764. variables. In this case, $wakeup_lat is obviously a variable, but
  765. next_pid isn't, since it's just naming a field in the sched_switch
  766. trace event. Since this is something that almost every trace() and
  767. save() action does, a special shortcut is implemented to allow field
  768. names to be used directly in those cases. How it works is that under
  769. the covers, a temporary variable is created for the named field, and
  770. this variable is what is actually passed to the trace handler. In the
  771. code and documentation, this type of variable is called a 'field
  772. variable'.
  773. Fields on other trace event's histograms can be used as well. In that
  774. case we have to generate a new histogram and an unfortunately named
  775. 'synthetic_field' (the use of synthetic here has nothing to do with
  776. synthetic events) and use that special histogram field as a variable.
  777. The diagram below illustrates the new elements described above in the
  778. context of the sched_switch histogram using the onmatch() handler and
  779. the trace() action.
  780. First, we define the wakeup_latency synthetic event::
  781. # echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
  782. Next, the sched_waking hist trigger as before::
  783. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >>
  784. events/sched/sched_waking/trigger
  785. Finally, we create a hist trigger on the sched_switch event that
  786. generates a wakeup_latency() trace event. In this case we pass
  787. next_pid into the wakeup_latency synthetic event invocation, which
  788. means it will be automatically converted into a field variable::
  789. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
  790. onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid)' >>
  791. /sys/kernel/tracing/events/sched/sched_switch/trigger
  792. The diagram for the sched_switch event is similar to previous examples
  793. but shows the additional field_vars[] array for hist_data and shows
  794. the linkages between the field_vars and the variables and references
  795. created to implement the field variables. The details are discussed
  796. below::
  797. +------------------+
  798. | hist_data |
  799. +------------------+ +-----------------------+
  800. | .fields[] |-->| val = hitcount |
  801. +----------------+ +-----------------------+
  802. | .map | | .size |
  803. +----------------+ +---------------------+
  804. +---| .field_vars[] | | .offset |
  805. | +----------------+ +---------------------+
  806. |+--| .var_refs[] | | .offset |
  807. || +----------------+ +---------------------+
  808. || | .fn() |
  809. || var_ref_vals[] +---------------------+
  810. || +-------------+ | .flags |
  811. || | $ts0 |<---+ +---------------------+
  812. || +-------------+ | | .var.idx |
  813. || | $next_pid |<-+ | +---------------------+
  814. || +-------------+ | | | .var.hist_data |
  815. ||+>| $wakeup_lat | | | +---------------------+
  816. ||| +-------------+ | | | .var_ref_idx |
  817. ||| | | | | +-----------------------+
  818. ||| +-------------+ | | | var = wakeup_lat |
  819. ||| . | | +-----------------------+
  820. ||| . | | | .size |
  821. ||| . | | +---------------------+
  822. ||| +-------------+ | | | .offset |
  823. ||| | | | | +---------------------+
  824. ||| +-------------+ | | | .fn() |
  825. ||| | | | | +---------------------+
  826. ||| +-------------+ | | | .flags & FL_VAR |
  827. ||| | | +---------------------+
  828. ||| | | | .var.idx |
  829. ||| | | +---------------------+
  830. ||| | | | .var.hist_data |
  831. ||| | | +---------------------+
  832. ||| | | | .var_ref_idx |
  833. ||| | | +---------------------+
  834. ||| | | .
  835. ||| | | .
  836. ||| | | .
  837. ||| | | .
  838. ||| +--------------+ | | .
  839. +-->| field_var | | | .
  840. || +--------------+ | | .
  841. || | var | | | .
  842. || +------------+ | | .
  843. || | val | | | .
  844. || +--------------+ | | .
  845. || | field_var | | | .
  846. || +--------------+ | | .
  847. || | var | | | .
  848. || +------------+ | | .
  849. || | val | | | .
  850. || +------------+ | | .
  851. || . | | .
  852. || . | | .
  853. || . | | +-----------------------+ <--- n_vals
  854. || +--------------+ | | | key = pid |
  855. || | field_var | | | +-----------------------+
  856. || +--------------+ | | | .size |
  857. || | var |--+| +---------------------+
  858. || +------------+ ||| | .offset |
  859. || | val |-+|| +---------------------+
  860. || +------------+ ||| | .fn() |
  861. || ||| +---------------------+
  862. || ||| | .flags |
  863. || ||| +---------------------+
  864. || ||| | .var.idx |
  865. || ||| +---------------------+ <--- n_fields
  866. || |||
  867. || ||| n_keys = n_fields - n_vals
  868. || ||| +-----------------------+
  869. || |+->| var = next_pid |
  870. || | | +-----------------------+
  871. || | | | .size |
  872. || | | +---------------------+
  873. || | | | .offset |
  874. || | | +---------------------+
  875. || | | | .flags & FL_VAR |
  876. || | | +---------------------+
  877. || | | | .var.idx |
  878. || | | +---------------------+
  879. || | | | .var.hist_data |
  880. || | | +-----------------------+
  881. || +-->| val for next_pid |
  882. || | | +-----------------------+
  883. || | | | .size |
  884. || | | +---------------------+
  885. || | | | .offset |
  886. || | | +---------------------+
  887. || | | | .fn() |
  888. || | | +---------------------+
  889. || | | | .flags |
  890. || | | +---------------------+
  891. || | | | |
  892. || | | +---------------------+
  893. || | |
  894. || | |
  895. || | | +-----------------------+
  896. +|------------------|-|>| var_ref = $ts0 |
  897. | | | +-----------------------+
  898. | | | | .size |
  899. | | | +---------------------+
  900. | | | | .offset |
  901. | | | +---------------------+
  902. | | | | .fn() |
  903. | | | +---------------------+
  904. | | | | .flags & FL_VAR_REF |
  905. | | | +---------------------+
  906. | | +---| .var_ref_idx |
  907. | | +-----------------------+
  908. | | | var_ref = $next_pid |
  909. | | +-----------------------+
  910. | | | .size |
  911. | | +---------------------+
  912. | | | .offset |
  913. | | +---------------------+
  914. | | | .fn() |
  915. | | +---------------------+
  916. | | | .flags & FL_VAR_REF |
  917. | | +---------------------+
  918. | +-----| .var_ref_idx |
  919. | +-----------------------+
  920. | | var_ref = $wakeup_lat |
  921. | +-----------------------+
  922. | | .size |
  923. | +---------------------+
  924. | | .offset |
  925. | +---------------------+
  926. | | .fn() |
  927. | +---------------------+
  928. | | .flags & FL_VAR_REF |
  929. | +---------------------+
  930. +------------------------| .var_ref_idx |
  931. +---------------------+
  932. As you can see, for a field variable, two hist_fields are created: one
  933. representing the variable, in this case next_pid, and one to actually
  934. get the value of the field from the trace stream, like a normal val
  935. field does. These are created separately from normal variable
  936. creation and are saved in the hist_data->field_vars[] array. See
  937. below for how these are used. In addition, a reference hist_field is
  938. also created, which is needed to reference the field variables such as
  939. $next_pid variable in the trace() action.
  940. Note that $wakeup_lat is also a variable reference, referencing the
  941. value of the expression common_timestamp-$ts0, and so also needs to
  942. have a hist field entry representing that reference created.
  943. When hist_trigger_elt_update() is called to get the normal key and
  944. value fields, it also calls update_field_vars(), which goes through
  945. each field_var created for the histogram, and available from
  946. hist_data->field_vars and calls val->fn() to get the data from the
  947. current trace record, and then uses the var's var.idx to set the
  948. variable at the var.idx offset in the appropriate tracing_map_elt's
  949. variable at elt->vars[var.idx].
  950. Once all the variables have been updated, resolve_var_refs() can be
  951. called from event_hist_trigger(), and not only can our $ts0 and
  952. $next_pid references be resolved but the $wakeup_lat reference as
  953. well. At this point, the trace() action can simply access the values
  954. assembled in the var_ref_vals[] array and generate the trace event.
  955. The same process occurs for the field variables associated with the
  956. save() action.
  957. Abbreviations used in the diagram::
  958. hist_data = struct hist_trigger_data
  959. hist_data.fields = struct hist_field
  960. field_var = struct field_var
  961. fn = hist_field_fn_t
  962. FL_KEY = HIST_FIELD_FL_KEY
  963. FL_VAR = HIST_FIELD_FL_VAR
  964. FL_VAR_REF = HIST_FIELD_FL_VAR_REF
  965. trace() action field variable test
  966. ----------------------------------
  967. This example adds to the previous test example by finally making use
  968. of the wakeup_lat variable, but in addition also creates a couple of
  969. field variables that then are all passed to the wakeup_latency() trace
  970. action via the onmatch() handler.
  971. First, we create the wakeup_latency synthetic event::
  972. # echo 'wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events
  973. Next, the sched_waking trigger from previous examples::
  974. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  975. Finally, as in the previous test example, we calculate and assign the
  976. wakeup latency using the $ts0 reference from the sched_waking trigger
  977. to the wakeup_lat variable, and finally use it along with a couple
  978. sched_switch event fields, next_pid and next_comm, to generate a
  979. wakeup_latency trace event. The next_pid and next_comm event fields
  980. are automatically converted into field variables for this purpose::
  981. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,next_comm)' >> /sys/kernel/tracing/events/sched/sched_switch/trigger
  982. The sched_waking hist_debug output shows the same data as in the
  983. previous test example::
  984. # cat events/sched/sched_waking/hist_debug
  985. # event histogram
  986. #
  987. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  988. #
  989. hist_data: 00000000d60ff61f
  990. n_vals: 2
  991. n_keys: 1
  992. n_fields: 3
  993. val fields:
  994. hist_data->fields[0]:
  995. flags:
  996. VAL: HIST_FIELD_FL_HITCOUNT
  997. type: u64
  998. size: 8
  999. is_signed: 0
  1000. hist_data->fields[1]:
  1001. flags:
  1002. HIST_FIELD_FL_VAR
  1003. var.name: ts0
  1004. var.idx (into tracing_map_elt.vars[]): 0
  1005. type: u64
  1006. size: 8
  1007. is_signed: 0
  1008. key fields:
  1009. hist_data->fields[2]:
  1010. flags:
  1011. HIST_FIELD_FL_KEY
  1012. ftrace_event_field name: pid
  1013. type: pid_t
  1014. size: 8
  1015. is_signed: 1
  1016. The sched_switch hist_debug output shows the same key and value fields
  1017. as in the previous test example - note that wakeup_lat is still in the
  1018. val fields section, but that the new field variables are not there -
  1019. although the field variables are variables, they're held separately in
  1020. the hist_data's field_vars[] array. Although the field variables and
  1021. the normal variables are located in separate places, you can see that
  1022. the actual variable locations for those variables in the
  1023. tracing_map_elt.vars[] do have increasing indices as expected:
  1024. wakeup_lat takes the var.idx = 0 slot, while the field variables for
  1025. next_pid and next_comm have values var.idx = 1, and var.idx = 2. Note
  1026. also that those are the same values displayed for the variable
  1027. references corresponding to those variables in the variable reference
  1028. fields section. Since there are two triggers and thus two hist_data
  1029. addresses, those addresses also need to be accounted for when doing
  1030. the matching - you can see that the first variable refers to the 0
  1031. var.idx on the previous hist trigger (see the hist_data address
  1032. associated with that trigger), while the second variable refers to the
  1033. 0 var.idx on the sched_switch hist trigger, as do all the remaining
  1034. variable references.
  1035. Finally, the action tracking variables section just shows the system
  1036. and event name for the onmatch() handler::
  1037. # cat events/sched/sched_switch/hist_debug
  1038. # event histogram
  1039. #
  1040. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,next_comm) [active]
  1041. #
  1042. hist_data: 0000000008f551b7
  1043. n_vals: 2
  1044. n_keys: 1
  1045. n_fields: 3
  1046. val fields:
  1047. hist_data->fields[0]:
  1048. flags:
  1049. VAL: HIST_FIELD_FL_HITCOUNT
  1050. type: u64
  1051. size: 8
  1052. is_signed: 0
  1053. hist_data->fields[1]:
  1054. flags:
  1055. HIST_FIELD_FL_VAR
  1056. var.name: wakeup_lat
  1057. var.idx (into tracing_map_elt.vars[]): 0
  1058. type: u64
  1059. size: 0
  1060. is_signed: 0
  1061. key fields:
  1062. hist_data->fields[2]:
  1063. flags:
  1064. HIST_FIELD_FL_KEY
  1065. ftrace_event_field name: next_pid
  1066. type: pid_t
  1067. size: 8
  1068. is_signed: 1
  1069. variable reference fields:
  1070. hist_data->var_refs[0]:
  1071. flags:
  1072. HIST_FIELD_FL_VAR_REF
  1073. name: ts0
  1074. var.idx (into tracing_map_elt.vars[]): 0
  1075. var.hist_data: 00000000d60ff61f
  1076. var_ref_idx (into hist_data->var_refs[]): 0
  1077. type: u64
  1078. size: 8
  1079. is_signed: 0
  1080. hist_data->var_refs[1]:
  1081. flags:
  1082. HIST_FIELD_FL_VAR_REF
  1083. name: wakeup_lat
  1084. var.idx (into tracing_map_elt.vars[]): 0
  1085. var.hist_data: 0000000008f551b7
  1086. var_ref_idx (into hist_data->var_refs[]): 1
  1087. type: u64
  1088. size: 0
  1089. is_signed: 0
  1090. hist_data->var_refs[2]:
  1091. flags:
  1092. HIST_FIELD_FL_VAR_REF
  1093. name: next_pid
  1094. var.idx (into tracing_map_elt.vars[]): 1
  1095. var.hist_data: 0000000008f551b7
  1096. var_ref_idx (into hist_data->var_refs[]): 2
  1097. type: pid_t
  1098. size: 4
  1099. is_signed: 0
  1100. hist_data->var_refs[3]:
  1101. flags:
  1102. HIST_FIELD_FL_VAR_REF
  1103. name: next_comm
  1104. var.idx (into tracing_map_elt.vars[]): 2
  1105. var.hist_data: 0000000008f551b7
  1106. var_ref_idx (into hist_data->var_refs[]): 3
  1107. type: char[16]
  1108. size: 256
  1109. is_signed: 0
  1110. field variables:
  1111. hist_data->field_vars[0]:
  1112. field_vars[0].var:
  1113. flags:
  1114. HIST_FIELD_FL_VAR
  1115. var.name: next_pid
  1116. var.idx (into tracing_map_elt.vars[]): 1
  1117. field_vars[0].val:
  1118. ftrace_event_field name: next_pid
  1119. type: pid_t
  1120. size: 4
  1121. is_signed: 1
  1122. hist_data->field_vars[1]:
  1123. field_vars[1].var:
  1124. flags:
  1125. HIST_FIELD_FL_VAR
  1126. var.name: next_comm
  1127. var.idx (into tracing_map_elt.vars[]): 2
  1128. field_vars[1].val:
  1129. ftrace_event_field name: next_comm
  1130. type: char[16]
  1131. size: 256
  1132. is_signed: 0
  1133. action tracking variables (for onmax()/onchange()/onmatch()):
  1134. hist_data->actions[0].match_data.event_system: sched
  1135. hist_data->actions[0].match_data.event: sched_waking
  1136. The commands below can be used to clean things up for the next test::
  1137. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,next_comm)' >> /sys/kernel/tracing/events/sched/sched_switch/trigger
  1138. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1139. # echo '!wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events
  1140. action_data and the trace() action
  1141. ----------------------------------
  1142. As mentioned above, when the trace() action generates a synthetic
  1143. event, all the parameters to the synthetic event either already are
  1144. variables or are converted into variables (via field variables), and
  1145. finally all those variable values are collected via references to them
  1146. into a var_ref_vals[] array.
  1147. The values in the var_ref_vals[] array, however, don't necessarily
  1148. follow the same ordering as the synthetic event params. To address
  1149. that, struct action_data contains another array, var_ref_idx[] that
  1150. maps the trace action params to the var_ref_vals[] values. Below is a
  1151. diagram illustrating that for the wakeup_latency() synthetic event::
  1152. +------------------+ wakeup_latency()
  1153. | action_data | event params var_ref_vals[]
  1154. +------------------+ +-----------------+ +-----------------+
  1155. | .var_ref_idx[] |--->| $wakeup_lat idx |---+ | |
  1156. +----------------+ +-----------------+ | +-----------------+
  1157. | .synth_event | | $next_pid idx |---|-+ | $wakeup_lat val |
  1158. +----------------+ +-----------------+ | | +-----------------+
  1159. . | +->| $next_pid val |
  1160. . | +-----------------+
  1161. . | .
  1162. +-----------------+ | .
  1163. | | | .
  1164. +-----------------+ | +-----------------+
  1165. +--->| $wakeup_lat val |
  1166. +-----------------+
  1167. Basically, how this ends up getting used in the synthetic event probe
  1168. function, trace_event_raw_event_synth(), is as follows::
  1169. for each field i in .synth_event
  1170. val_idx = .var_ref_idx[i]
  1171. val = var_ref_vals[val_idx]
  1172. action_data and the onXXX() handlers
  1173. ------------------------------------
  1174. The hist trigger onXXX() actions other than onmatch(), such as onmax()
  1175. and onchange(), also make use of and internally create hidden
  1176. variables. This information is contained in the
  1177. action_data.track_data struct, and is also visible in the hist_debug
  1178. output as will be described in the example below.
  1179. Typically, the onmax() or onchange() handlers are used in conjunction
  1180. with the save() and snapshot() actions. For example::
  1181. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
  1182. onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm)' >>
  1183. /sys/kernel/tracing/events/sched/sched_switch/trigger
  1184. or::
  1185. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
  1186. onmax($wakeup_lat).snapshot()' >>
  1187. /sys/kernel/tracing/events/sched/sched_switch/trigger
  1188. save() action field variable test
  1189. ---------------------------------
  1190. For this example, instead of generating a synthetic event, the save()
  1191. action is used to save field values whenever an onmax() handler
  1192. detects that a new max latency has been hit. As in the previous
  1193. example, the values being saved are also field values, but in this
  1194. case, are kept in a separate hist_data array named save_vars[].
  1195. As in previous test examples, we set up the sched_waking trigger::
  1196. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1197. In this case, however, we set up the sched_switch trigger to save some
  1198. sched_switch field values whenever we hit a new maximum latency. For
  1199. both the onmax() handler and save() action, variables will be created,
  1200. which we can use the hist_debug files to examine::
  1201. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm)' >> events/sched/sched_switch/trigger
  1202. The sched_waking hist_debug output shows the same data as in the
  1203. previous test examples::
  1204. # cat events/sched/sched_waking/hist_debug
  1205. #
  1206. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  1207. #
  1208. hist_data: 00000000e6290f48
  1209. n_vals: 2
  1210. n_keys: 1
  1211. n_fields: 3
  1212. val fields:
  1213. hist_data->fields[0]:
  1214. flags:
  1215. VAL: HIST_FIELD_FL_HITCOUNT
  1216. type: u64
  1217. size: 8
  1218. is_signed: 0
  1219. hist_data->fields[1]:
  1220. flags:
  1221. HIST_FIELD_FL_VAR
  1222. var.name: ts0
  1223. var.idx (into tracing_map_elt.vars[]): 0
  1224. type: u64
  1225. size: 8
  1226. is_signed: 0
  1227. key fields:
  1228. hist_data->fields[2]:
  1229. flags:
  1230. HIST_FIELD_FL_KEY
  1231. ftrace_event_field name: pid
  1232. type: pid_t
  1233. size: 8
  1234. is_signed: 1
  1235. The output of the sched_switch trigger shows the same val and key
  1236. values as before, but also shows a couple new sections.
  1237. First, the action tracking variables section now shows the
  1238. actions[].track_data information describing the special tracking
  1239. variables and references used to track, in this case, the running
  1240. maximum value. The actions[].track_data.var_ref member contains the
  1241. reference to the variable being tracked, in this case the $wakeup_lat
  1242. variable. In order to perform the onmax() handler function, there
  1243. also needs to be a variable that tracks the current maximum by getting
  1244. updated whenever a new maximum is hit. In this case, we can see that
  1245. an auto-generated variable named ' __max' has been created and is
  1246. visible in the actions[].track_data.track_var variable.
  1247. Finally, in the new 'save action variables' section, we can see that
  1248. the 4 params to the save() function have resulted in 4 field variables
  1249. being created for the purposes of saving the values of the named
  1250. fields when the max is hit. These variables are kept in a separate
  1251. save_vars[] array off of hist_data, so are displayed in a separate
  1252. section::
  1253. # cat events/sched/sched_switch/hist_debug
  1254. # event histogram
  1255. #
  1256. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) [active]
  1257. #
  1258. hist_data: 0000000057bcd28d
  1259. n_vals: 2
  1260. n_keys: 1
  1261. n_fields: 3
  1262. val fields:
  1263. hist_data->fields[0]:
  1264. flags:
  1265. VAL: HIST_FIELD_FL_HITCOUNT
  1266. type: u64
  1267. size: 8
  1268. is_signed: 0
  1269. hist_data->fields[1]:
  1270. flags:
  1271. HIST_FIELD_FL_VAR
  1272. var.name: wakeup_lat
  1273. var.idx (into tracing_map_elt.vars[]): 0
  1274. type: u64
  1275. size: 0
  1276. is_signed: 0
  1277. key fields:
  1278. hist_data->fields[2]:
  1279. flags:
  1280. HIST_FIELD_FL_KEY
  1281. ftrace_event_field name: next_pid
  1282. type: pid_t
  1283. size: 8
  1284. is_signed: 1
  1285. variable reference fields:
  1286. hist_data->var_refs[0]:
  1287. flags:
  1288. HIST_FIELD_FL_VAR_REF
  1289. name: ts0
  1290. var.idx (into tracing_map_elt.vars[]): 0
  1291. var.hist_data: 00000000e6290f48
  1292. var_ref_idx (into hist_data->var_refs[]): 0
  1293. type: u64
  1294. size: 8
  1295. is_signed: 0
  1296. hist_data->var_refs[1]:
  1297. flags:
  1298. HIST_FIELD_FL_VAR_REF
  1299. name: wakeup_lat
  1300. var.idx (into tracing_map_elt.vars[]): 0
  1301. var.hist_data: 0000000057bcd28d
  1302. var_ref_idx (into hist_data->var_refs[]): 1
  1303. type: u64
  1304. size: 0
  1305. is_signed: 0
  1306. action tracking variables (for onmax()/onchange()/onmatch()):
  1307. hist_data->actions[0].track_data.var_ref:
  1308. flags:
  1309. HIST_FIELD_FL_VAR_REF
  1310. name: wakeup_lat
  1311. var.idx (into tracing_map_elt.vars[]): 0
  1312. var.hist_data: 0000000057bcd28d
  1313. var_ref_idx (into hist_data->var_refs[]): 1
  1314. type: u64
  1315. size: 0
  1316. is_signed: 0
  1317. hist_data->actions[0].track_data.track_var:
  1318. flags:
  1319. HIST_FIELD_FL_VAR
  1320. var.name: __max
  1321. var.idx (into tracing_map_elt.vars[]): 1
  1322. type: u64
  1323. size: 8
  1324. is_signed: 0
  1325. save action variables (save() params):
  1326. hist_data->save_vars[0]:
  1327. save_vars[0].var:
  1328. flags:
  1329. HIST_FIELD_FL_VAR
  1330. var.name: next_comm
  1331. var.idx (into tracing_map_elt.vars[]): 2
  1332. save_vars[0].val:
  1333. ftrace_event_field name: next_comm
  1334. type: char[16]
  1335. size: 256
  1336. is_signed: 0
  1337. hist_data->save_vars[1]:
  1338. save_vars[1].var:
  1339. flags:
  1340. HIST_FIELD_FL_VAR
  1341. var.name: prev_pid
  1342. var.idx (into tracing_map_elt.vars[]): 3
  1343. save_vars[1].val:
  1344. ftrace_event_field name: prev_pid
  1345. type: pid_t
  1346. size: 4
  1347. is_signed: 1
  1348. hist_data->save_vars[2]:
  1349. save_vars[2].var:
  1350. flags:
  1351. HIST_FIELD_FL_VAR
  1352. var.name: prev_prio
  1353. var.idx (into tracing_map_elt.vars[]): 4
  1354. save_vars[2].val:
  1355. ftrace_event_field name: prev_prio
  1356. type: int
  1357. size: 4
  1358. is_signed: 1
  1359. hist_data->save_vars[3]:
  1360. save_vars[3].var:
  1361. flags:
  1362. HIST_FIELD_FL_VAR
  1363. var.name: prev_comm
  1364. var.idx (into tracing_map_elt.vars[]): 5
  1365. save_vars[3].val:
  1366. ftrace_event_field name: prev_comm
  1367. type: char[16]
  1368. size: 256
  1369. is_signed: 0
  1370. The commands below can be used to clean things up for the next test::
  1371. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm)' >> events/sched/sched_switch/trigger
  1372. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1373. A couple special cases
  1374. ======================
  1375. While the above covers the basics of the histogram internals, there
  1376. are a couple of special cases that should be discussed, since they
  1377. tend to create even more confusion. Those are field variables on other
  1378. histograms, and aliases, both described below through example tests
  1379. using the hist_debug files.
  1380. Test of field variables on other histograms
  1381. -------------------------------------------
  1382. This example is similar to the previous examples, but in this case,
  1383. the sched_switch trigger references a hist trigger field on another
  1384. event, namely the sched_waking event. In order to accomplish this, a
  1385. field variable is created for the other event, but since an existing
  1386. histogram can't be used, as existing histograms are immutable, a new
  1387. histogram with a matching variable is created and used, and we'll see
  1388. that reflected in the hist_debug output shown below.
  1389. First, we create the wakeup_latency synthetic event. Note the
  1390. addition of the prio field::
  1391. # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> synthetic_events
  1392. As in previous test examples, we set up the sched_waking trigger::
  1393. # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1394. Here we set up a hist trigger on sched_switch to send a wakeup_latency
  1395. event using an onmatch handler naming the sched_waking event. Note
  1396. that the third param being passed to the wakeup_latency() is prio,
  1397. which is a field name that needs to have a field variable created for
  1398. it. There isn't however any prio field on the sched_switch event so
  1399. it would seem that it wouldn't be possible to create a field variable
  1400. for it. The matching sched_waking event does have a prio field, so it
  1401. should be possible to make use of it for this purpose. The problem
  1402. with that is that it's not currently possible to define a new variable
  1403. on an existing histogram, so it's not possible to add a new prio field
  1404. variable to the existing sched_waking histogram. It is however
  1405. possible to create an additional new 'matching' sched_waking histogram
  1406. for the same event, meaning that it uses the same key and filters, and
  1407. define the new prio field variable on that.
  1408. Here's the sched_switch trigger::
  1409. # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio)' >> events/sched/sched_switch/trigger
  1410. And here's the output of the hist_debug information for the
  1411. sched_waking hist trigger. Note that there are two histograms
  1412. displayed in the output: the first is the normal sched_waking
  1413. histogram we've seen in the previous examples, and the second is the
  1414. special histogram we created to provide the prio field variable.
  1415. Looking at the second histogram below, we see a variable with the name
  1416. synthetic_prio. This is the field variable created for the prio field
  1417. on that sched_waking histogram::
  1418. # cat events/sched/sched_waking/hist_debug
  1419. # event histogram
  1420. #
  1421. # trigger info: hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  1422. #
  1423. hist_data: 00000000349570e4
  1424. n_vals: 2
  1425. n_keys: 1
  1426. n_fields: 3
  1427. val fields:
  1428. hist_data->fields[0]:
  1429. flags:
  1430. VAL: HIST_FIELD_FL_HITCOUNT
  1431. type: u64
  1432. size: 8
  1433. is_signed: 0
  1434. hist_data->fields[1]:
  1435. flags:
  1436. HIST_FIELD_FL_VAR
  1437. var.name: ts0
  1438. var.idx (into tracing_map_elt.vars[]): 0
  1439. type: u64
  1440. size: 8
  1441. is_signed: 0
  1442. key fields:
  1443. hist_data->fields[2]:
  1444. flags:
  1445. HIST_FIELD_FL_KEY
  1446. ftrace_event_field name: pid
  1447. type: pid_t
  1448. size: 8
  1449. is_signed: 1
  1450. # event histogram
  1451. #
  1452. # trigger info: hist:keys=pid:vals=hitcount:synthetic_prio=prio:sort=hitcount:size=2048 [active]
  1453. #
  1454. hist_data: 000000006920cf38
  1455. n_vals: 2
  1456. n_keys: 1
  1457. n_fields: 3
  1458. val fields:
  1459. hist_data->fields[0]:
  1460. flags:
  1461. VAL: HIST_FIELD_FL_HITCOUNT
  1462. type: u64
  1463. size: 8
  1464. is_signed: 0
  1465. hist_data->fields[1]:
  1466. flags:
  1467. HIST_FIELD_FL_VAR
  1468. ftrace_event_field name: prio
  1469. var.name: synthetic_prio
  1470. var.idx (into tracing_map_elt.vars[]): 0
  1471. type: int
  1472. size: 4
  1473. is_signed: 1
  1474. key fields:
  1475. hist_data->fields[2]:
  1476. flags:
  1477. HIST_FIELD_FL_KEY
  1478. ftrace_event_field name: pid
  1479. type: pid_t
  1480. size: 8
  1481. is_signed: 1
  1482. Looking at the sched_switch histogram below, we can see a reference to
  1483. the synthetic_prio variable on sched_waking, and looking at the
  1484. associated hist_data address we see that it is indeed associated with
  1485. the new histogram. Note also that the other references are to a
  1486. normal variable, wakeup_lat, and to a normal field variable, next_pid,
  1487. the details of which are in the field variables section::
  1488. # cat events/sched/sched_switch/hist_debug
  1489. # event histogram
  1490. #
  1491. # trigger info: hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio) [active]
  1492. #
  1493. hist_data: 00000000a73b67df
  1494. n_vals: 2
  1495. n_keys: 1
  1496. n_fields: 3
  1497. val fields:
  1498. hist_data->fields[0]:
  1499. flags:
  1500. VAL: HIST_FIELD_FL_HITCOUNT
  1501. type: u64
  1502. size: 8
  1503. is_signed: 0
  1504. hist_data->fields[1]:
  1505. flags:
  1506. HIST_FIELD_FL_VAR
  1507. var.name: wakeup_lat
  1508. var.idx (into tracing_map_elt.vars[]): 0
  1509. type: u64
  1510. size: 0
  1511. is_signed: 0
  1512. key fields:
  1513. hist_data->fields[2]:
  1514. flags:
  1515. HIST_FIELD_FL_KEY
  1516. ftrace_event_field name: next_pid
  1517. type: pid_t
  1518. size: 8
  1519. is_signed: 1
  1520. variable reference fields:
  1521. hist_data->var_refs[0]:
  1522. flags:
  1523. HIST_FIELD_FL_VAR_REF
  1524. name: ts0
  1525. var.idx (into tracing_map_elt.vars[]): 0
  1526. var.hist_data: 00000000349570e4
  1527. var_ref_idx (into hist_data->var_refs[]): 0
  1528. type: u64
  1529. size: 8
  1530. is_signed: 0
  1531. hist_data->var_refs[1]:
  1532. flags:
  1533. HIST_FIELD_FL_VAR_REF
  1534. name: wakeup_lat
  1535. var.idx (into tracing_map_elt.vars[]): 0
  1536. var.hist_data: 00000000a73b67df
  1537. var_ref_idx (into hist_data->var_refs[]): 1
  1538. type: u64
  1539. size: 0
  1540. is_signed: 0
  1541. hist_data->var_refs[2]:
  1542. flags:
  1543. HIST_FIELD_FL_VAR_REF
  1544. name: next_pid
  1545. var.idx (into tracing_map_elt.vars[]): 1
  1546. var.hist_data: 00000000a73b67df
  1547. var_ref_idx (into hist_data->var_refs[]): 2
  1548. type: pid_t
  1549. size: 4
  1550. is_signed: 0
  1551. hist_data->var_refs[3]:
  1552. flags:
  1553. HIST_FIELD_FL_VAR_REF
  1554. name: synthetic_prio
  1555. var.idx (into tracing_map_elt.vars[]): 0
  1556. var.hist_data: 000000006920cf38
  1557. var_ref_idx (into hist_data->var_refs[]): 3
  1558. type: int
  1559. size: 4
  1560. is_signed: 1
  1561. field variables:
  1562. hist_data->field_vars[0]:
  1563. field_vars[0].var:
  1564. flags:
  1565. HIST_FIELD_FL_VAR
  1566. var.name: next_pid
  1567. var.idx (into tracing_map_elt.vars[]): 1
  1568. field_vars[0].val:
  1569. ftrace_event_field name: next_pid
  1570. type: pid_t
  1571. size: 4
  1572. is_signed: 1
  1573. action tracking variables (for onmax()/onchange()/onmatch()):
  1574. hist_data->actions[0].match_data.event_system: sched
  1575. hist_data->actions[0].match_data.event: sched_waking
  1576. The commands below can be used to clean things up for the next test::
  1577. # echo '!hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio)' >> events/sched/sched_switch/trigger
  1578. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1579. # echo '!wakeup_latency u64 lat; pid_t pid; int prio' >> synthetic_events
  1580. Alias test
  1581. ----------
  1582. This example is very similar to previous examples, but demonstrates
  1583. the alias flag.
  1584. First, we create the wakeup_latency synthetic event::
  1585. # echo 'wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events
  1586. Next, we create a sched_waking trigger similar to previous examples,
  1587. but in this case we save the pid in the waking_pid variable::
  1588. # echo 'hist:keys=pid:waking_pid=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1589. For the sched_switch trigger, instead of using $waking_pid directly in
  1590. the wakeup_latency synthetic event invocation, we create an alias of
  1591. $waking_pid named $woken_pid, and use that in the synthetic event
  1592. invocation instead::
  1593. # echo 'hist:keys=next_pid:woken_pid=$waking_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,$woken_pid,next_comm)' >> events/sched/sched_switch/trigger
  1594. Looking at the sched_waking hist_debug output, in addition to the
  1595. normal fields, we can see the waking_pid variable::
  1596. # cat events/sched/sched_waking/hist_debug
  1597. # event histogram
  1598. #
  1599. # trigger info: hist:keys=pid:vals=hitcount:waking_pid=pid,ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active]
  1600. #
  1601. hist_data: 00000000a250528c
  1602. n_vals: 3
  1603. n_keys: 1
  1604. n_fields: 4
  1605. val fields:
  1606. hist_data->fields[0]:
  1607. flags:
  1608. VAL: HIST_FIELD_FL_HITCOUNT
  1609. type: u64
  1610. size: 8
  1611. is_signed: 0
  1612. hist_data->fields[1]:
  1613. flags:
  1614. HIST_FIELD_FL_VAR
  1615. ftrace_event_field name: pid
  1616. var.name: waking_pid
  1617. var.idx (into tracing_map_elt.vars[]): 0
  1618. type: pid_t
  1619. size: 4
  1620. is_signed: 1
  1621. hist_data->fields[2]:
  1622. flags:
  1623. HIST_FIELD_FL_VAR
  1624. var.name: ts0
  1625. var.idx (into tracing_map_elt.vars[]): 1
  1626. type: u64
  1627. size: 8
  1628. is_signed: 0
  1629. key fields:
  1630. hist_data->fields[3]:
  1631. flags:
  1632. HIST_FIELD_FL_KEY
  1633. ftrace_event_field name: pid
  1634. type: pid_t
  1635. size: 8
  1636. is_signed: 1
  1637. The sched_switch hist_debug output shows that a variable named
  1638. woken_pid has been created but that it also has the
  1639. HIST_FIELD_FL_ALIAS flag set. It also has the HIST_FIELD_FL_VAR flag
  1640. set, which is why it appears in the val field section.
  1641. Despite that implementation detail, an alias variable is actually more
  1642. like a variable reference; in fact it can be thought of as a reference
  1643. to a reference. The implementation copies the var_ref->fn() from the
  1644. variable reference being referenced, in this case, the waking_pid
  1645. fn(), which is hist_field_var_ref() and makes that the fn() of the
  1646. alias. The hist_field_var_ref() fn() requires the var_ref_idx of the
  1647. variable reference it's using, so waking_pid's var_ref_idx is also
  1648. copied to the alias. The end result is that when the value of alias
  1649. is retrieved, in the end it just does the same thing the original
  1650. reference would have done and retrieves the same value from the
  1651. var_ref_vals[] array. You can verify this in the output by noting
  1652. that the var_ref_idx of the alias, in this case woken_pid, is the same
  1653. as the var_ref_idx of the reference, waking_pid, in the variable
  1654. reference fields section.
  1655. Additionally, once it gets that value, since it is also a variable, it
  1656. then saves that value into its var.idx. So the var.idx of the
  1657. woken_pid alias is 0, which it fills with the value from var_ref_idx 0
  1658. when its fn() is called to update itself. You'll also notice that
  1659. there's a woken_pid var_ref in the variable refs section. That is the
  1660. reference to the woken_pid alias variable, and you can see that it
  1661. retrieves the value from the same var.idx as the woken_pid alias, 0,
  1662. and then in turn saves that value in its own var_ref_idx slot, 3, and
  1663. the value at this position is finally what gets assigned to the
  1664. $woken_pid slot in the trace event invocation::
  1665. # cat events/sched/sched_switch/hist_debug
  1666. # event histogram
  1667. #
  1668. # trigger info: hist:keys=next_pid:vals=hitcount:woken_pid=$waking_pid,wakeup_lat=common_timestamp.usecs-$ts0:sort=hitcount:size=2048:clock=global:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,$woken_pid,next_comm) [active]
  1669. #
  1670. hist_data: 0000000055d65ed0
  1671. n_vals: 3
  1672. n_keys: 1
  1673. n_fields: 4
  1674. val fields:
  1675. hist_data->fields[0]:
  1676. flags:
  1677. VAL: HIST_FIELD_FL_HITCOUNT
  1678. type: u64
  1679. size: 8
  1680. is_signed: 0
  1681. hist_data->fields[1]:
  1682. flags:
  1683. HIST_FIELD_FL_VAR
  1684. HIST_FIELD_FL_ALIAS
  1685. var.name: woken_pid
  1686. var.idx (into tracing_map_elt.vars[]): 0
  1687. var_ref_idx (into hist_data->var_refs[]): 0
  1688. type: pid_t
  1689. size: 4
  1690. is_signed: 1
  1691. hist_data->fields[2]:
  1692. flags:
  1693. HIST_FIELD_FL_VAR
  1694. var.name: wakeup_lat
  1695. var.idx (into tracing_map_elt.vars[]): 1
  1696. type: u64
  1697. size: 0
  1698. is_signed: 0
  1699. key fields:
  1700. hist_data->fields[3]:
  1701. flags:
  1702. HIST_FIELD_FL_KEY
  1703. ftrace_event_field name: next_pid
  1704. type: pid_t
  1705. size: 8
  1706. is_signed: 1
  1707. variable reference fields:
  1708. hist_data->var_refs[0]:
  1709. flags:
  1710. HIST_FIELD_FL_VAR_REF
  1711. name: waking_pid
  1712. var.idx (into tracing_map_elt.vars[]): 0
  1713. var.hist_data: 00000000a250528c
  1714. var_ref_idx (into hist_data->var_refs[]): 0
  1715. type: pid_t
  1716. size: 4
  1717. is_signed: 1
  1718. hist_data->var_refs[1]:
  1719. flags:
  1720. HIST_FIELD_FL_VAR_REF
  1721. name: ts0
  1722. var.idx (into tracing_map_elt.vars[]): 1
  1723. var.hist_data: 00000000a250528c
  1724. var_ref_idx (into hist_data->var_refs[]): 1
  1725. type: u64
  1726. size: 8
  1727. is_signed: 0
  1728. hist_data->var_refs[2]:
  1729. flags:
  1730. HIST_FIELD_FL_VAR_REF
  1731. name: wakeup_lat
  1732. var.idx (into tracing_map_elt.vars[]): 1
  1733. var.hist_data: 0000000055d65ed0
  1734. var_ref_idx (into hist_data->var_refs[]): 2
  1735. type: u64
  1736. size: 0
  1737. is_signed: 0
  1738. hist_data->var_refs[3]:
  1739. flags:
  1740. HIST_FIELD_FL_VAR_REF
  1741. name: woken_pid
  1742. var.idx (into tracing_map_elt.vars[]): 0
  1743. var.hist_data: 0000000055d65ed0
  1744. var_ref_idx (into hist_data->var_refs[]): 3
  1745. type: pid_t
  1746. size: 4
  1747. is_signed: 1
  1748. hist_data->var_refs[4]:
  1749. flags:
  1750. HIST_FIELD_FL_VAR_REF
  1751. name: next_comm
  1752. var.idx (into tracing_map_elt.vars[]): 2
  1753. var.hist_data: 0000000055d65ed0
  1754. var_ref_idx (into hist_data->var_refs[]): 4
  1755. type: char[16]
  1756. size: 256
  1757. is_signed: 0
  1758. field variables:
  1759. hist_data->field_vars[0]:
  1760. field_vars[0].var:
  1761. flags:
  1762. HIST_FIELD_FL_VAR
  1763. var.name: next_comm
  1764. var.idx (into tracing_map_elt.vars[]): 2
  1765. field_vars[0].val:
  1766. ftrace_event_field name: next_comm
  1767. type: char[16]
  1768. size: 256
  1769. is_signed: 0
  1770. action tracking variables (for onmax()/onchange()/onmatch()):
  1771. hist_data->actions[0].match_data.event_system: sched
  1772. hist_data->actions[0].match_data.event: sched_waking
  1773. The commands below can be used to clean things up for the next test::
  1774. # echo '!hist:keys=next_pid:woken_pid=$waking_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,$woken_pid,next_comm)' >> events/sched/sched_switch/trigger
  1775. # echo '!hist:keys=pid:ts0=common_timestamp.usecs' >> events/sched/sched_waking/trigger
  1776. # echo '!wakeup_latency u64 lat; pid_t pid; char comm[16]' >> synthetic_events