xdr_parse.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #!/usr/bin/env python3
  2. # ex: set filetype=python:
  3. """Common parsing code for xdrgen"""
  4. import sys
  5. from typing import Callable
  6. from lark import Lark
  7. from lark.exceptions import UnexpectedInput, UnexpectedToken, VisitError
  8. # Set to True to emit annotation comments in generated source
  9. annotate = False
  10. # Set to True to emit enum value validation in decoders
  11. enum_validation = True
  12. # Map internal Lark token names to human-readable names
  13. TOKEN_NAMES = {
  14. "__ANON_0": "identifier",
  15. "__ANON_1": "number",
  16. "SEMICOLON": "';'",
  17. "LBRACE": "'{'",
  18. "RBRACE": "'}'",
  19. "LPAR": "'('",
  20. "RPAR": "')'",
  21. "LSQB": "'['",
  22. "RSQB": "']'",
  23. "LESSTHAN": "'<'",
  24. "MORETHAN": "'>'",
  25. "EQUAL": "'='",
  26. "COLON": "':'",
  27. "COMMA": "','",
  28. "STAR": "'*'",
  29. "$END": "end of file",
  30. }
  31. class XdrParseError(Exception):
  32. """Raised when XDR parsing fails"""
  33. def set_xdr_annotate(set_it: bool) -> None:
  34. """Set 'annotate' if --annotate was specified on the command line"""
  35. global annotate
  36. annotate = set_it
  37. def get_xdr_annotate() -> bool:
  38. """Return True if --annotate was specified on the command line"""
  39. return annotate
  40. def set_xdr_enum_validation(set_it: bool) -> None:
  41. """Set 'enum_validation' based on command line options"""
  42. global enum_validation
  43. enum_validation = set_it
  44. def get_xdr_enum_validation() -> bool:
  45. """Return True when enum validation is enabled for decoder generation"""
  46. return enum_validation
  47. def make_error_handler(source: str, filename: str) -> Callable[[UnexpectedInput], bool]:
  48. """Create an error handler that reports the first parse error and aborts.
  49. Args:
  50. source: The XDR source text being parsed
  51. filename: The name of the file being parsed
  52. Returns:
  53. An error handler function for use with Lark's on_error parameter
  54. """
  55. lines = source.splitlines()
  56. def handle_parse_error(e: UnexpectedInput) -> bool:
  57. """Report a parse error with context and abort parsing"""
  58. line_num = e.line
  59. column = e.column
  60. line_text = lines[line_num - 1] if 0 < line_num <= len(lines) else ""
  61. # Build the error message
  62. msg_parts = [f"{filename}:{line_num}:{column}: parse error"]
  63. # Show what was found vs what was expected
  64. if isinstance(e, UnexpectedToken):
  65. token = e.token
  66. if token.type == "__ANON_0":
  67. found = f"identifier '{token.value}'"
  68. elif token.type == "__ANON_1":
  69. found = f"number '{token.value}'"
  70. else:
  71. found = f"'{token.value}'"
  72. msg_parts.append(f"Unexpected {found}")
  73. # Provide helpful expected tokens list
  74. expected = e.expected
  75. if expected:
  76. readable = [
  77. TOKEN_NAMES.get(exp, exp.lower().replace("_", " "))
  78. for exp in sorted(expected)
  79. ]
  80. if len(readable) == 1:
  81. msg_parts.append(f"Expected {readable[0]}")
  82. elif len(readable) <= 4:
  83. msg_parts.append(f"Expected one of: {', '.join(readable)}")
  84. else:
  85. msg_parts.append(str(e).split("\n")[0])
  86. # Show the offending line with a caret pointing to the error
  87. msg_parts.append("")
  88. msg_parts.append(f" {line_text}")
  89. prefix = line_text[: column - 1].expandtabs()
  90. msg_parts.append(f" {' ' * len(prefix)}^")
  91. sys.stderr.write("\n".join(msg_parts) + "\n")
  92. raise XdrParseError()
  93. return handle_parse_error
  94. def handle_transform_error(e: VisitError, source: str, filename: str) -> None:
  95. """Report a transform error with context.
  96. Args:
  97. e: The VisitError from Lark's transformer
  98. source: The XDR source text being parsed
  99. filename: The name of the file being parsed
  100. """
  101. lines = source.splitlines()
  102. # Extract position from the tree node if available
  103. line_num = 0
  104. column = 0
  105. if hasattr(e.obj, "meta") and e.obj.meta:
  106. line_num = e.obj.meta.line
  107. column = e.obj.meta.column
  108. line_text = lines[line_num - 1] if 0 < line_num <= len(lines) else ""
  109. # Build the error message
  110. msg_parts = [f"{filename}:{line_num}:{column}: semantic error"]
  111. # The original exception is typically a KeyError for undefined types
  112. if isinstance(e.orig_exc, KeyError):
  113. msg_parts.append(f"Undefined type '{e.orig_exc.args[0]}'")
  114. else:
  115. msg_parts.append(str(e.orig_exc))
  116. # Show the offending line with a caret pointing to the error
  117. if line_text:
  118. msg_parts.append("")
  119. msg_parts.append(f" {line_text}")
  120. prefix = line_text[: column - 1].expandtabs()
  121. msg_parts.append(f" {' ' * len(prefix)}^")
  122. sys.stderr.write("\n".join(msg_parts) + "\n")
  123. def xdr_parser() -> Lark:
  124. """Return a Lark parser instance configured with the XDR language grammar"""
  125. return Lark.open(
  126. "grammars/xdr.lark",
  127. rel_to=__file__,
  128. start="specification",
  129. debug=True,
  130. strict=True,
  131. propagate_positions=True,
  132. parser="lalr",
  133. lexer="contextual",
  134. )