buffer.rs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. // SPDX-License-Identifier: Apache-2.0 OR MIT
  2. //! A stably addressed token buffer supporting efficient traversal based on a
  3. //! cheaply copyable cursor.
  4. // This module is heavily commented as it contains most of the unsafe code in
  5. // Syn, and caution should be used when editing it. The public-facing interface
  6. // is 100% safe but the implementation is fragile internally.
  7. use crate::Lifetime;
  8. use proc_macro2::extra::DelimSpan;
  9. use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
  10. use std::cmp::Ordering;
  11. use std::marker::PhantomData;
  12. use std::ptr;
  13. /// Internal type which is used instead of `TokenTree` to represent a token tree
  14. /// within a `TokenBuffer`.
  15. enum Entry {
  16. // Mimicking types from proc-macro.
  17. // Group entries contain the offset to the matching End entry.
  18. Group(Group, usize),
  19. Ident(Ident),
  20. Punct(Punct),
  21. Literal(Literal),
  22. // End entries contain the offset (negative) to the start of the buffer, and
  23. // offset (negative) to the matching Group entry.
  24. End(isize, isize),
  25. }
  26. /// A buffer that can be efficiently traversed multiple times, unlike
  27. /// `TokenStream` which requires a deep copy in order to traverse more than
  28. /// once.
  29. pub struct TokenBuffer {
  30. // NOTE: Do not implement clone on this - while the current design could be
  31. // cloned, other designs which could be desirable may not be cloneable.
  32. entries: Box<[Entry]>,
  33. }
  34. impl TokenBuffer {
  35. fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
  36. for tt in stream {
  37. match tt {
  38. TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
  39. TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
  40. TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
  41. TokenTree::Group(group) => {
  42. let group_start_index = entries.len();
  43. entries.push(Entry::End(0, 0)); // we replace this below
  44. Self::recursive_new(entries, group.stream());
  45. let group_end_index = entries.len();
  46. let group_offset = group_end_index - group_start_index;
  47. entries.push(Entry::End(
  48. -(group_end_index as isize),
  49. -(group_offset as isize),
  50. ));
  51. entries[group_start_index] = Entry::Group(group, group_offset);
  52. }
  53. }
  54. }
  55. }
  56. /// Creates a `TokenBuffer` containing all the tokens from the input
  57. /// `proc_macro::TokenStream`.
  58. #[cfg(feature = "proc-macro")]
  59. #[cfg_attr(docsrs, doc(cfg(feature = "proc-macro")))]
  60. pub fn new(stream: proc_macro::TokenStream) -> Self {
  61. Self::new2(stream.into())
  62. }
  63. /// Creates a `TokenBuffer` containing all the tokens from the input
  64. /// `proc_macro2::TokenStream`.
  65. pub fn new2(stream: TokenStream) -> Self {
  66. let mut entries = Vec::new();
  67. Self::recursive_new(&mut entries, stream);
  68. entries.push(Entry::End(-(entries.len() as isize), 0));
  69. Self {
  70. entries: entries.into_boxed_slice(),
  71. }
  72. }
  73. /// Creates a cursor referencing the first token in the buffer and able to
  74. /// traverse until the end of the buffer.
  75. pub fn begin(&self) -> Cursor {
  76. let ptr = self.entries.as_ptr();
  77. unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
  78. }
  79. }
  80. /// A cheaply copyable cursor into a `TokenBuffer`.
  81. ///
  82. /// This cursor holds a shared reference into the immutable data which is used
  83. /// internally to represent a `TokenStream`, and can be efficiently manipulated
  84. /// and copied around.
  85. ///
  86. /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
  87. /// object and get a cursor to its first token with `begin()`.
  88. pub struct Cursor<'a> {
  89. // The current entry which the `Cursor` is pointing at.
  90. ptr: *const Entry,
  91. // This is the only `Entry::End` object which this cursor is allowed to
  92. // point at. All other `End` objects are skipped over in `Cursor::create`.
  93. scope: *const Entry,
  94. // Cursor is covariant in 'a. This field ensures that our pointers are still
  95. // valid.
  96. marker: PhantomData<&'a Entry>,
  97. }
  98. impl<'a> Cursor<'a> {
  99. /// Creates a cursor referencing a static empty TokenStream.
  100. pub fn empty() -> Self {
  101. // It's safe in this situation for us to put an `Entry` object in global
  102. // storage, despite it not actually being safe to send across threads
  103. // (`Ident` is a reference into a thread-local table). This is because
  104. // this entry never includes a `Ident` object.
  105. //
  106. // This wrapper struct allows us to break the rules and put a `Sync`
  107. // object in global storage.
  108. struct UnsafeSyncEntry(Entry);
  109. unsafe impl Sync for UnsafeSyncEntry {}
  110. static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0, 0));
  111. Cursor {
  112. ptr: &EMPTY_ENTRY.0,
  113. scope: &EMPTY_ENTRY.0,
  114. marker: PhantomData,
  115. }
  116. }
  117. /// This create method intelligently exits non-explicitly-entered
  118. /// `None`-delimited scopes when the cursor reaches the end of them,
  119. /// allowing for them to be treated transparently.
  120. unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
  121. // NOTE: If we're looking at a `End`, we want to advance the cursor
  122. // past it, unless `ptr == scope`, which means that we're at the edge of
  123. // our cursor's scope. We should only have `ptr != scope` at the exit
  124. // from None-delimited groups entered with `ignore_none`.
  125. while let Entry::End(..) = unsafe { &*ptr } {
  126. if ptr::eq(ptr, scope) {
  127. break;
  128. }
  129. ptr = unsafe { ptr.add(1) };
  130. }
  131. Cursor {
  132. ptr,
  133. scope,
  134. marker: PhantomData,
  135. }
  136. }
  137. /// Get the current entry.
  138. fn entry(self) -> &'a Entry {
  139. unsafe { &*self.ptr }
  140. }
  141. /// Bump the cursor to point at the next token after the current one. This
  142. /// is undefined behavior if the cursor is currently looking at an
  143. /// `Entry::End`.
  144. ///
  145. /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
  146. /// point at the first token in the group (with the same scope end).
  147. unsafe fn bump_ignore_group(self) -> Cursor<'a> {
  148. unsafe { Cursor::create(self.ptr.offset(1), self.scope) }
  149. }
  150. /// While the cursor is looking at a `None`-delimited group, move it to look
  151. /// at the first token inside instead. If the group is empty, this will move
  152. /// the cursor past the `None`-delimited group.
  153. ///
  154. /// WARNING: This mutates its argument.
  155. fn ignore_none(&mut self) {
  156. while let Entry::Group(group, _) = self.entry() {
  157. if group.delimiter() == Delimiter::None {
  158. unsafe { *self = self.bump_ignore_group() };
  159. } else {
  160. break;
  161. }
  162. }
  163. }
  164. /// Checks whether the cursor is currently pointing at the end of its valid
  165. /// scope.
  166. pub fn eof(self) -> bool {
  167. // We're at eof if we're at the end of our scope.
  168. ptr::eq(self.ptr, self.scope)
  169. }
  170. /// If the cursor is pointing at a `Ident`, returns it along with a cursor
  171. /// pointing at the next `TokenTree`.
  172. pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
  173. self.ignore_none();
  174. match self.entry() {
  175. Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
  176. _ => None,
  177. }
  178. }
  179. /// If the cursor is pointing at a `Punct`, returns it along with a cursor
  180. /// pointing at the next `TokenTree`.
  181. pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
  182. self.ignore_none();
  183. match self.entry() {
  184. Entry::Punct(punct) if punct.as_char() != '\'' => {
  185. Some((punct.clone(), unsafe { self.bump_ignore_group() }))
  186. }
  187. _ => None,
  188. }
  189. }
  190. /// If the cursor is pointing at a `Literal`, return it along with a cursor
  191. /// pointing at the next `TokenTree`.
  192. pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
  193. self.ignore_none();
  194. match self.entry() {
  195. Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
  196. _ => None,
  197. }
  198. }
  199. /// If the cursor is pointing at a `Lifetime`, returns it along with a
  200. /// cursor pointing at the next `TokenTree`.
  201. pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
  202. self.ignore_none();
  203. match self.entry() {
  204. Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
  205. let next = unsafe { self.bump_ignore_group() };
  206. let (ident, rest) = next.ident()?;
  207. let lifetime = Lifetime {
  208. apostrophe: punct.span(),
  209. ident,
  210. };
  211. Some((lifetime, rest))
  212. }
  213. _ => None,
  214. }
  215. }
  216. /// If the cursor is pointing at a `Group` with the given delimiter, returns
  217. /// a cursor into that group and one pointing to the next `TokenTree`.
  218. pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> {
  219. // If we're not trying to enter a none-delimited group, we want to
  220. // ignore them. We have to make sure to _not_ ignore them when we want
  221. // to enter them, of course. For obvious reasons.
  222. if delim != Delimiter::None {
  223. self.ignore_none();
  224. }
  225. if let Entry::Group(group, end_offset) = self.entry() {
  226. if group.delimiter() == delim {
  227. let span = group.delim_span();
  228. let end_of_group = unsafe { self.ptr.add(*end_offset) };
  229. let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
  230. let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
  231. return Some((inside_of_group, span, after_group));
  232. }
  233. }
  234. None
  235. }
  236. /// If the cursor is pointing at a `Group`, returns a cursor into the group
  237. /// and one pointing to the next `TokenTree`.
  238. pub fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> {
  239. if let Entry::Group(group, end_offset) = self.entry() {
  240. let delimiter = group.delimiter();
  241. let span = group.delim_span();
  242. let end_of_group = unsafe { self.ptr.add(*end_offset) };
  243. let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
  244. let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
  245. return Some((inside_of_group, delimiter, span, after_group));
  246. }
  247. None
  248. }
  249. pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> {
  250. if let Entry::Group(group, end_offset) = self.entry() {
  251. let end_of_group = unsafe { self.ptr.add(*end_offset) };
  252. let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
  253. return Some((group.clone(), after_group));
  254. }
  255. None
  256. }
  257. /// Copies all remaining tokens visible from this cursor into a
  258. /// `TokenStream`.
  259. pub fn token_stream(self) -> TokenStream {
  260. let mut tts = Vec::new();
  261. let mut cursor = self;
  262. while let Some((tt, rest)) = cursor.token_tree() {
  263. tts.push(tt);
  264. cursor = rest;
  265. }
  266. tts.into_iter().collect()
  267. }
  268. /// If the cursor is pointing at a `TokenTree`, returns it along with a
  269. /// cursor pointing at the next `TokenTree`.
  270. ///
  271. /// Returns `None` if the cursor has reached the end of its stream.
  272. ///
  273. /// This method does not treat `None`-delimited groups as transparent, and
  274. /// will return a `Group(None, ..)` if the cursor is looking at one.
  275. pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
  276. let (tree, len) = match self.entry() {
  277. Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
  278. Entry::Literal(literal) => (literal.clone().into(), 1),
  279. Entry::Ident(ident) => (ident.clone().into(), 1),
  280. Entry::Punct(punct) => (punct.clone().into(), 1),
  281. Entry::End(..) => return None,
  282. };
  283. let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
  284. Some((tree, rest))
  285. }
  286. /// Returns the `Span` of the current token, or `Span::call_site()` if this
  287. /// cursor points to eof.
  288. pub fn span(mut self) -> Span {
  289. match self.entry() {
  290. Entry::Group(group, _) => group.span(),
  291. Entry::Literal(literal) => literal.span(),
  292. Entry::Ident(ident) => ident.span(),
  293. Entry::Punct(punct) => punct.span(),
  294. Entry::End(_, offset) => {
  295. self.ptr = unsafe { self.ptr.offset(*offset) };
  296. if let Entry::Group(group, _) = self.entry() {
  297. group.span_close()
  298. } else {
  299. Span::call_site()
  300. }
  301. }
  302. }
  303. }
  304. /// Returns the `Span` of the token immediately prior to the position of
  305. /// this cursor, or of the current token if there is no previous one.
  306. #[cfg(any(feature = "full", feature = "derive"))]
  307. pub(crate) fn prev_span(mut self) -> Span {
  308. if start_of_buffer(self) < self.ptr {
  309. self.ptr = unsafe { self.ptr.offset(-1) };
  310. }
  311. self.span()
  312. }
  313. /// Skip over the next token that is not a None-delimited group, without
  314. /// cloning it. Returns `None` if this cursor points to eof.
  315. ///
  316. /// This method treats `'lifetimes` as a single token.
  317. pub(crate) fn skip(mut self) -> Option<Cursor<'a>> {
  318. self.ignore_none();
  319. let len = match self.entry() {
  320. Entry::End(..) => return None,
  321. // Treat lifetimes as a single tt for the purposes of 'skip'.
  322. Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
  323. match unsafe { &*self.ptr.add(1) } {
  324. Entry::Ident(_) => 2,
  325. _ => 1,
  326. }
  327. }
  328. Entry::Group(_, end_offset) => *end_offset,
  329. _ => 1,
  330. };
  331. Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
  332. }
  333. pub(crate) fn scope_delimiter(self) -> Delimiter {
  334. match unsafe { &*self.scope } {
  335. Entry::End(_, offset) => match unsafe { &*self.scope.offset(*offset) } {
  336. Entry::Group(group, _) => group.delimiter(),
  337. _ => Delimiter::None,
  338. },
  339. _ => unreachable!(),
  340. }
  341. }
  342. }
  343. impl<'a> Copy for Cursor<'a> {}
  344. impl<'a> Clone for Cursor<'a> {
  345. fn clone(&self) -> Self {
  346. *self
  347. }
  348. }
  349. impl<'a> Eq for Cursor<'a> {}
  350. impl<'a> PartialEq for Cursor<'a> {
  351. fn eq(&self, other: &Self) -> bool {
  352. ptr::eq(self.ptr, other.ptr)
  353. }
  354. }
  355. impl<'a> PartialOrd for Cursor<'a> {
  356. fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
  357. if same_buffer(*self, *other) {
  358. Some(cmp_assuming_same_buffer(*self, *other))
  359. } else {
  360. None
  361. }
  362. }
  363. }
  364. pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
  365. ptr::eq(a.scope, b.scope)
  366. }
  367. pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
  368. ptr::eq(start_of_buffer(a), start_of_buffer(b))
  369. }
  370. fn start_of_buffer(cursor: Cursor) -> *const Entry {
  371. unsafe {
  372. match &*cursor.scope {
  373. Entry::End(offset, _) => cursor.scope.offset(*offset),
  374. _ => unreachable!(),
  375. }
  376. }
  377. }
  378. pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
  379. a.ptr.cmp(&b.ptr)
  380. }
  381. pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
  382. match cursor.entry() {
  383. Entry::Group(group, _) => group.span_open(),
  384. _ => cursor.span(),
  385. }
  386. }