kernel_include.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. #!/usr/bin/env python3
  2. # SPDX-License-Identifier: GPL-2.0
  3. # pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707
  4. """
  5. Implementation of the ``kernel-include`` reST-directive.
  6. :copyright: Copyright (C) 2016 Markus Heiser
  7. :license: GPL Version 2, June 1991 see linux/COPYING for details.
  8. The ``kernel-include`` reST-directive is a replacement for the ``include``
  9. directive. The ``kernel-include`` directive expand environment variables in
  10. the path name and allows to include files from arbitrary locations.
  11. .. hint::
  12. Including files from arbitrary locations (e.g. from ``/etc``) is a
  13. security risk for builders. This is why the ``include`` directive from
  14. docutils *prohibit* pathnames pointing to locations *above* the filesystem
  15. tree where the reST document with the include directive is placed.
  16. Substrings of the form $name or ${name} are replaced by the value of
  17. environment variable name. Malformed variable names and references to
  18. non-existing variables are left unchanged.
  19. **Supported Sphinx Include Options**:
  20. :param literal:
  21. If present, the included file is inserted as a literal block.
  22. :param code:
  23. Specify the language for syntax highlighting (e.g., 'c', 'python').
  24. :param encoding:
  25. Specify the encoding of the included file (default: 'utf-8').
  26. :param tab-width:
  27. Specify the number of spaces that a tab represents.
  28. :param start-line:
  29. Line number at which to start including the file (1-based).
  30. :param end-line:
  31. Line number at which to stop including the file (inclusive).
  32. :param start-after:
  33. Include lines after the first line matching this text.
  34. :param end-before:
  35. Include lines before the first line matching this text.
  36. :param number-lines:
  37. Number the included lines (integer specifies start number).
  38. Only effective with 'literal' or 'code' options.
  39. :param class:
  40. Specify HTML class attribute for the included content.
  41. **Kernel-specific Extensions**:
  42. :param generate-cross-refs:
  43. If present, instead of directly including the file, it calls
  44. ParseDataStructs() to convert C data structures into cross-references
  45. that link to comprehensive documentation in other ReST files.
  46. :param exception-file:
  47. (Used with generate-cross-refs)
  48. Path to a file containing rules for handling special cases:
  49. - Ignore specific C data structures
  50. - Use alternative reference names
  51. - Specify different reference types
  52. :param warn-broken:
  53. (Used with generate-cross-refs)
  54. Enables warnings when auto-generated cross-references don't point to
  55. existing documentation targets.
  56. """
  57. # ==============================================================================
  58. # imports
  59. # ==============================================================================
  60. import os.path
  61. import re
  62. import sys
  63. from difflib import get_close_matches
  64. from docutils import io, nodes, statemachine
  65. from docutils.statemachine import ViewList
  66. from docutils.parsers.rst import Directive, directives
  67. from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
  68. from sphinx.util import logging
  69. srctree = os.path.abspath(os.environ["srctree"])
  70. sys.path.insert(0, os.path.join(srctree, "tools/lib/python"))
  71. from kdoc.parse_data_structs import ParseDataStructs
  72. __version__ = "1.0"
  73. logger = logging.getLogger(__name__)
  74. RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
  75. RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
  76. RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)')
  77. RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)")
  78. def ErrorString(exc): # Shamelessly stolen from docutils
  79. return f'{exc.__class__.__name}: {exc}'
  80. # ==============================================================================
  81. class KernelInclude(Directive):
  82. """
  83. KernelInclude (``kernel-include``) directive
  84. Most of the stuff here came from Include directive defined at:
  85. docutils/parsers/rst/directives/misc.py
  86. Yet, overriding the class don't has any benefits: the original class
  87. only have run() and argument list. Not all of them are implemented,
  88. when checked against latest Sphinx version, as with time more arguments
  89. were added.
  90. So, keep its own list of supported arguments
  91. """
  92. required_arguments = 1
  93. optional_arguments = 0
  94. final_argument_whitespace = True
  95. option_spec = {
  96. 'literal': directives.flag,
  97. 'code': directives.unchanged,
  98. 'encoding': directives.encoding,
  99. 'tab-width': int,
  100. 'start-line': int,
  101. 'end-line': int,
  102. 'start-after': directives.unchanged_required,
  103. 'end-before': directives.unchanged_required,
  104. # ignored except for 'literal' or 'code':
  105. 'number-lines': directives.unchanged, # integer or None
  106. 'class': directives.class_option,
  107. # Arguments that aren't from Sphinx Include directive
  108. 'generate-cross-refs': directives.flag,
  109. 'warn-broken': directives.flag,
  110. 'toc': directives.flag,
  111. 'exception-file': directives.unchanged,
  112. }
  113. def read_rawtext(self, path, encoding):
  114. """Read and process file content with error handling"""
  115. try:
  116. self.state.document.settings.record_dependencies.add(path)
  117. include_file = io.FileInput(source_path=path,
  118. encoding=encoding,
  119. error_handler=self.state.document.settings.input_encoding_error_handler)
  120. except UnicodeEncodeError:
  121. raise self.severe('Problems with directive path:\n'
  122. 'Cannot encode input file path "%s" '
  123. '(wrong locale?).' % path)
  124. except IOError as error:
  125. raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))
  126. try:
  127. return include_file.read()
  128. except UnicodeError as error:
  129. raise self.severe('Problem with directive:\n%s' % ErrorString(error))
  130. def apply_range(self, rawtext):
  131. """
  132. Handles start-line, end-line, start-after and end-before parameters
  133. """
  134. # Get to-be-included content
  135. startline = self.options.get('start-line', None)
  136. endline = self.options.get('end-line', None)
  137. try:
  138. if startline or (endline is not None):
  139. lines = rawtext.splitlines()
  140. rawtext = '\n'.join(lines[startline:endline])
  141. except UnicodeError as error:
  142. raise self.severe(f'Problem with "{self.name}" directive:\n'
  143. + io.error_string(error))
  144. # start-after/end-before: no restrictions on newlines in match-text,
  145. # and no restrictions on matching inside lines vs. line boundaries
  146. after_text = self.options.get("start-after", None)
  147. if after_text:
  148. # skip content in rawtext before *and incl.* a matching text
  149. after_index = rawtext.find(after_text)
  150. if after_index < 0:
  151. raise self.severe('Problem with "start-after" option of "%s" '
  152. "directive:\nText not found." % self.name)
  153. rawtext = rawtext[after_index + len(after_text) :]
  154. before_text = self.options.get("end-before", None)
  155. if before_text:
  156. # skip content in rawtext after *and incl.* a matching text
  157. before_index = rawtext.find(before_text)
  158. if before_index < 0:
  159. raise self.severe('Problem with "end-before" option of "%s" '
  160. "directive:\nText not found." % self.name)
  161. rawtext = rawtext[:before_index]
  162. return rawtext
  163. def xref_text(self, env, path, tab_width):
  164. """
  165. Read and add contents from a C file parsed to have cross references.
  166. There are two types of supported output here:
  167. - A C source code with cross-references;
  168. - a TOC table containing cross references.
  169. """
  170. parser = ParseDataStructs()
  171. if 'exception-file' in self.options:
  172. source_dir = os.path.dirname(os.path.abspath(
  173. self.state_machine.input_lines.source(
  174. self.lineno - self.state_machine.input_offset - 1)))
  175. exceptions_file = os.path.join(source_dir, self.options['exception-file'])
  176. else:
  177. exceptions_file = None
  178. parser.parse_file(path, exceptions_file)
  179. # Store references on a symbol dict to be used at check time
  180. if 'warn-broken' in self.options:
  181. env._xref_files.add(path)
  182. if "toc" not in self.options:
  183. rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
  184. self.apply_range(rawtext)
  185. include_lines = statemachine.string2lines(rawtext, tab_width,
  186. convert_whitespace=True)
  187. # Sphinx always blame the ".. <directive>", so placing
  188. # line numbers here won't make any difference
  189. self.state_machine.insert_input(include_lines, path)
  190. return []
  191. # TOC output is a ReST file, not a literal. So, we can add line
  192. # numbers
  193. startline = self.options.get('start-line', None)
  194. endline = self.options.get('end-line', None)
  195. relpath = os.path.relpath(path, srctree)
  196. result = ViewList()
  197. for line in parser.gen_toc().split("\n"):
  198. match = RE_LINENO_REF.match(line)
  199. if not match:
  200. result.append(line, path)
  201. continue
  202. ln, ref = match.groups()
  203. ln = int(ln)
  204. # Filter line range if needed
  205. if startline and (ln < startline):
  206. continue
  207. if endline and (ln > endline):
  208. continue
  209. # Sphinx numerates starting with zero, but text editors
  210. # and other tools start from one
  211. realln = ln + 1
  212. result.append(f"- {ref}: {relpath}#{realln}", path, ln)
  213. self.state_machine.insert_input(result, path)
  214. return []
  215. def literal(self, path, tab_width, rawtext):
  216. """Output a literal block"""
  217. # Convert tabs to spaces, if `tab_width` is positive.
  218. if tab_width >= 0:
  219. text = rawtext.expandtabs(tab_width)
  220. else:
  221. text = rawtext
  222. literal_block = nodes.literal_block(rawtext, source=path,
  223. classes=self.options.get("class", []))
  224. literal_block.line = 1
  225. self.add_name(literal_block)
  226. if "number-lines" in self.options:
  227. try:
  228. startline = int(self.options["number-lines"] or 1)
  229. except ValueError:
  230. raise self.error(":number-lines: with non-integer start value")
  231. endline = startline + len(include_lines)
  232. if text.endswith("\n"):
  233. text = text[:-1]
  234. tokens = NumberLines([([], text)], startline, endline)
  235. for classes, value in tokens:
  236. if classes:
  237. literal_block += nodes.inline(value, value,
  238. classes=classes)
  239. else:
  240. literal_block += nodes.Text(value, value)
  241. else:
  242. literal_block += nodes.Text(text, text)
  243. return [literal_block]
  244. def code(self, path, tab_width):
  245. """Output a code block"""
  246. include_lines = statemachine.string2lines(rawtext, tab_width,
  247. convert_whitespace=True)
  248. self.options["source"] = path
  249. codeblock = CodeBlock(self.name,
  250. [self.options.pop("code")], # arguments
  251. self.options,
  252. include_lines,
  253. self.lineno,
  254. self.content_offset,
  255. self.block_text,
  256. self.state,
  257. self.state_machine)
  258. return codeblock.run()
  259. def run(self):
  260. """Include a file as part of the content of this reST file."""
  261. env = self.state.document.settings.env
  262. #
  263. # The include logic accepts only patches relative to the
  264. # Kernel source tree. The logic does check it to prevent
  265. # directory traverse issues.
  266. #
  267. srctree = os.path.abspath(os.environ["srctree"])
  268. path = os.path.expandvars(self.arguments[0])
  269. src_path = os.path.join(srctree, path)
  270. if os.path.isfile(src_path):
  271. base = srctree
  272. path = src_path
  273. else:
  274. raise self.warning(f'File "%s" doesn\'t exist', path)
  275. abs_base = os.path.abspath(base)
  276. abs_full_path = os.path.abspath(os.path.join(base, path))
  277. try:
  278. if os.path.commonpath([abs_full_path, abs_base]) != abs_base:
  279. raise self.severe('Problems with "%s" directive, prohibited path: %s' %
  280. (self.name, path))
  281. except ValueError:
  282. # Paths don't have the same drive (Windows) or other incompatibility
  283. raise self.severe('Problems with "%s" directive, invalid path: %s' %
  284. (self.name, path))
  285. self.arguments[0] = path
  286. #
  287. # Add path location to Sphinx dependencies to ensure proper cache
  288. # invalidation check.
  289. #
  290. env.note_dependency(os.path.abspath(path))
  291. if not self.state.document.settings.file_insertion_enabled:
  292. raise self.warning('"%s" directive disabled.' % self.name)
  293. source = self.state_machine.input_lines.source(self.lineno -
  294. self.state_machine.input_offset - 1)
  295. source_dir = os.path.dirname(os.path.abspath(source))
  296. path = directives.path(self.arguments[0])
  297. if path.startswith("<") and path.endswith(">"):
  298. path = os.path.join(self.standard_include_path, path[1:-1])
  299. path = os.path.normpath(os.path.join(source_dir, path))
  300. # HINT: this is the only line I had to change / commented out:
  301. # path = utils.relative_path(None, path)
  302. encoding = self.options.get("encoding",
  303. self.state.document.settings.input_encoding)
  304. tab_width = self.options.get("tab-width",
  305. self.state.document.settings.tab_width)
  306. # Get optional arguments to related to cross-references generation
  307. if "generate-cross-refs" in self.options:
  308. return self.xref_text(env, path, tab_width)
  309. rawtext = self.read_rawtext(path, encoding)
  310. rawtext = self.apply_range(rawtext)
  311. if "code" in self.options:
  312. return self.code(path, tab_width, rawtext)
  313. return self.literal(path, tab_width, rawtext)
  314. # ==============================================================================
  315. reported = set()
  316. DOMAIN_INFO = {}
  317. all_refs = {}
  318. def fill_domain_info(env):
  319. """
  320. Get supported reference types for each Sphinx domain and C namespaces
  321. """
  322. if DOMAIN_INFO:
  323. return
  324. for domain_name, domain_instance in env.domains.items():
  325. try:
  326. object_types = list(domain_instance.object_types.keys())
  327. DOMAIN_INFO[domain_name] = object_types
  328. except AttributeError:
  329. # Ignore domains that we can't retrieve object types, if any
  330. pass
  331. for domain in DOMAIN_INFO.keys():
  332. domain_obj = env.get_domain(domain)
  333. for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects():
  334. ref_name = name.lower()
  335. if domain == "c":
  336. if '.' in ref_name:
  337. ref_name = ref_name.split(".")[-1]
  338. if not ref_name in all_refs:
  339. all_refs[ref_name] = []
  340. all_refs[ref_name].append(f"\t{domain}:{objtype}:`{name}` (from {docname})")
  341. def get_suggestions(app, env, node,
  342. original_target, original_domain, original_reftype):
  343. """Check if target exists in the other domain or with different reftypes."""
  344. original_target = original_target.lower()
  345. # Remove namespace if present
  346. if original_domain == "c":
  347. if '.' in original_target:
  348. original_target = original_target.split(".")[-1]
  349. suggestions = []
  350. # If name exists, propose exact name match on different domains
  351. if original_target in all_refs:
  352. return all_refs[original_target]
  353. # If not found, get a close match, using difflib.
  354. # Such method is based on Ratcliff-Obershelp Algorithm, which seeks
  355. # for a close match within a certain distance. We're using the defaults
  356. # here, e.g. cutoff=0.6, proposing 3 alternatives
  357. matches = get_close_matches(original_target, all_refs.keys())
  358. for match in matches:
  359. suggestions += all_refs[match]
  360. return suggestions
  361. def check_missing_refs(app, env, node, contnode):
  362. """Check broken refs for the files it creates xrefs"""
  363. if not node.source:
  364. return None
  365. try:
  366. xref_files = env._xref_files
  367. except AttributeError:
  368. logger.critical("FATAL: _xref_files not initialized!")
  369. raise
  370. # Only show missing references for kernel-include reference-parsed files
  371. if node.source not in xref_files:
  372. return None
  373. fill_domain_info(env)
  374. target = node.get('reftarget', '')
  375. domain = node.get('refdomain', 'std')
  376. reftype = node.get('reftype', '')
  377. msg = f"Invalid xref: {domain}:{reftype}:`{target}`"
  378. # Don't duplicate warnings
  379. data = (node.source, msg)
  380. if data in reported:
  381. return None
  382. reported.add(data)
  383. suggestions = get_suggestions(app, env, node, target, domain, reftype)
  384. if suggestions:
  385. msg += ". Possible alternatives:\n" + '\n'.join(suggestions)
  386. logger.warning(msg, location=node, type='ref', subtype='missing')
  387. return None
  388. def merge_xref_info(app, env, docnames, other):
  389. """
  390. As each process modify env._xref_files, we need to merge them back.
  391. """
  392. if not hasattr(other, "_xref_files"):
  393. return
  394. env._xref_files.update(getattr(other, "_xref_files", set()))
  395. def init_xref_docs(app, env, docnames):
  396. """Initialize a list of files that we're generating cross references¨"""
  397. app.env._xref_files = set()
  398. # ==============================================================================
  399. def setup(app):
  400. """Setup Sphinx exension"""
  401. app.connect("env-before-read-docs", init_xref_docs)
  402. app.connect("env-merge-info", merge_xref_info)
  403. app.add_directive("kernel-include", KernelInclude)
  404. app.connect("missing-reference", check_missing_refs)
  405. return {
  406. "version": __version__,
  407. "parallel_read_safe": True,
  408. "parallel_write_safe": True,
  409. }