discouraged.rs 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. // SPDX-License-Identifier: Apache-2.0 OR MIT
  2. //! Extensions to the parsing API with niche applicability.
  3. use crate::buffer::Cursor;
  4. use crate::error::Result;
  5. use crate::parse::{inner_unexpected, ParseBuffer, Unexpected};
  6. use proc_macro2::extra::DelimSpan;
  7. use proc_macro2::Delimiter;
  8. use std::cell::Cell;
  9. use std::mem;
  10. use std::rc::Rc;
  11. /// Extensions to the `ParseStream` API to support speculative parsing.
  12. pub trait Speculative {
  13. /// Advance this parse stream to the position of a forked parse stream.
  14. ///
  15. /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
  16. /// parse stream, perform some speculative parsing, then join the original
  17. /// stream to the fork to "commit" the parsing from the fork to the main
  18. /// stream.
  19. ///
  20. /// If you can avoid doing this, you should, as it limits the ability to
  21. /// generate useful errors. That said, it is often the only way to parse
  22. /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
  23. /// is that when the fork fails to parse an `A`, it's impossible to tell
  24. /// whether that was because of a syntax error and the user meant to provide
  25. /// an `A`, or that the `A`s are finished and it's time to start parsing
  26. /// `B`s. Use with care.
  27. ///
  28. /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
  29. /// parsing `B*` and removing the leading members of `A` from the
  30. /// repetition, bypassing the need to involve the downsides associated with
  31. /// speculative parsing.
  32. ///
  33. /// [`ParseStream::fork`]: ParseBuffer::fork
  34. ///
  35. /// # Example
  36. ///
  37. /// There has been chatter about the possibility of making the colons in the
  38. /// turbofish syntax like `path::to::<T>` no longer required by accepting
  39. /// `path::to<T>` in expression position. Specifically, according to [RFC
  40. /// 2544], [`PathSegment`] parsing should always try to consume a following
  41. /// `<` token as the start of generic arguments, and reset to the `<` if
  42. /// that fails (e.g. the token is acting as a less-than operator).
  43. ///
  44. /// This is the exact kind of parsing behavior which requires the "fork,
  45. /// try, commit" behavior that [`ParseStream::fork`] discourages. With
  46. /// `advance_to`, we can avoid having to parse the speculatively parsed
  47. /// content a second time.
  48. ///
  49. /// This change in behavior can be implemented in syn by replacing just the
  50. /// `Parse` implementation for `PathSegment`:
  51. ///
  52. /// ```
  53. /// # use syn::ext::IdentExt;
  54. /// use syn::parse::discouraged::Speculative;
  55. /// # use syn::parse::{Parse, ParseStream};
  56. /// # use syn::{Ident, PathArguments, Result, Token};
  57. ///
  58. /// pub struct PathSegment {
  59. /// pub ident: Ident,
  60. /// pub arguments: PathArguments,
  61. /// }
  62. /// #
  63. /// # impl<T> From<T> for PathSegment
  64. /// # where
  65. /// # T: Into<Ident>,
  66. /// # {
  67. /// # fn from(ident: T) -> Self {
  68. /// # PathSegment {
  69. /// # ident: ident.into(),
  70. /// # arguments: PathArguments::None,
  71. /// # }
  72. /// # }
  73. /// # }
  74. ///
  75. /// impl Parse for PathSegment {
  76. /// fn parse(input: ParseStream) -> Result<Self> {
  77. /// if input.peek(Token![super])
  78. /// || input.peek(Token![self])
  79. /// || input.peek(Token![Self])
  80. /// || input.peek(Token![crate])
  81. /// {
  82. /// let ident = input.call(Ident::parse_any)?;
  83. /// return Ok(PathSegment::from(ident));
  84. /// }
  85. ///
  86. /// let ident = input.parse()?;
  87. /// if input.peek(Token![::]) && input.peek3(Token![<]) {
  88. /// return Ok(PathSegment {
  89. /// ident,
  90. /// arguments: PathArguments::AngleBracketed(input.parse()?),
  91. /// });
  92. /// }
  93. /// if input.peek(Token![<]) && !input.peek(Token![<=]) {
  94. /// let fork = input.fork();
  95. /// if let Ok(arguments) = fork.parse() {
  96. /// input.advance_to(&fork);
  97. /// return Ok(PathSegment {
  98. /// ident,
  99. /// arguments: PathArguments::AngleBracketed(arguments),
  100. /// });
  101. /// }
  102. /// }
  103. /// Ok(PathSegment::from(ident))
  104. /// }
  105. /// }
  106. ///
  107. /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
  108. /// ```
  109. ///
  110. /// # Drawbacks
  111. ///
  112. /// The main drawback of this style of speculative parsing is in error
  113. /// presentation. Even if the lookahead is the "correct" parse, the error
  114. /// that is shown is that of the "fallback" parse. To use the same example
  115. /// as the turbofish above, take the following unfinished "turbofish":
  116. ///
  117. /// ```text
  118. /// let _ = f<&'a fn(), for<'a> serde::>();
  119. /// ```
  120. ///
  121. /// If this is parsed as generic arguments, we can provide the error message
  122. ///
  123. /// ```text
  124. /// error: expected identifier
  125. /// --> src.rs:L:C
  126. /// |
  127. /// L | let _ = f<&'a fn(), for<'a> serde::>();
  128. /// | ^
  129. /// ```
  130. ///
  131. /// but if parsed using the above speculative parsing, it falls back to
  132. /// assuming that the `<` is a less-than when it fails to parse the generic
  133. /// arguments, and tries to interpret the `&'a` as the start of a labelled
  134. /// loop, resulting in the much less helpful error
  135. ///
  136. /// ```text
  137. /// error: expected `:`
  138. /// --> src.rs:L:C
  139. /// |
  140. /// L | let _ = f<&'a fn(), for<'a> serde::>();
  141. /// | ^^
  142. /// ```
  143. ///
  144. /// This can be mitigated with various heuristics (two examples: show both
  145. /// forks' parse errors, or show the one that consumed more tokens), but
  146. /// when you can control the grammar, sticking to something that can be
  147. /// parsed LL(3) and without the LL(*) speculative parsing this makes
  148. /// possible, displaying reasonable errors becomes much more simple.
  149. ///
  150. /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
  151. /// [`PathSegment`]: crate::PathSegment
  152. ///
  153. /// # Performance
  154. ///
  155. /// This method performs a cheap fixed amount of work that does not depend
  156. /// on how far apart the two streams are positioned.
  157. ///
  158. /// # Panics
  159. ///
  160. /// The forked stream in the argument of `advance_to` must have been
  161. /// obtained by forking `self`. Attempting to advance to any other stream
  162. /// will cause a panic.
  163. fn advance_to(&self, fork: &Self);
  164. }
  165. impl<'a> Speculative for ParseBuffer<'a> {
  166. fn advance_to(&self, fork: &Self) {
  167. if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
  168. panic!("fork was not derived from the advancing parse stream");
  169. }
  170. let (self_unexp, self_sp) = inner_unexpected(self);
  171. let (fork_unexp, fork_sp) = inner_unexpected(fork);
  172. if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
  173. match (fork_sp, self_sp) {
  174. // Unexpected set on the fork, but not on `self`, copy it over.
  175. (Some((span, delimiter)), None) => {
  176. self_unexp.set(Unexpected::Some(span, delimiter));
  177. }
  178. // Unexpected unset. Use chain to propagate errors from fork.
  179. (None, None) => {
  180. fork_unexp.set(Unexpected::Chain(self_unexp));
  181. // Ensure toplevel 'unexpected' tokens from the fork don't
  182. // propagate up the chain by replacing the root `unexpected`
  183. // pointer, only 'unexpected' tokens from existing group
  184. // parsers should propagate.
  185. fork.unexpected
  186. .set(Some(Rc::new(Cell::new(Unexpected::None))));
  187. }
  188. // Unexpected has been set on `self`. No changes needed.
  189. (_, Some(_)) => {}
  190. }
  191. }
  192. // See comment on `cell` in the struct definition.
  193. self.cell
  194. .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
  195. }
  196. }
  197. /// Extensions to the `ParseStream` API to support manipulating invisible
  198. /// delimiters the same as if they were visible.
  199. pub trait AnyDelimiter {
  200. /// Returns the delimiter, the span of the delimiter token, and the nested
  201. /// contents for further parsing.
  202. fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>;
  203. }
  204. impl<'a> AnyDelimiter for ParseBuffer<'a> {
  205. fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> {
  206. self.step(|cursor| {
  207. if let Some((content, delimiter, span, rest)) = cursor.any_group() {
  208. let scope = span.close();
  209. let nested = crate::parse::advance_step_cursor(cursor, content);
  210. let unexpected = crate::parse::get_unexpected(self);
  211. let content = crate::parse::new_parse_buffer(scope, nested, unexpected);
  212. Ok(((delimiter, span, content), rest))
  213. } else {
  214. Err(cursor.error("expected any delimiter"))
  215. }
  216. })
  217. }
  218. }