parse.rs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997
  1. // SPDX-License-Identifier: Apache-2.0 OR MIT
  2. use crate::fallback::{
  3. self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
  4. TokenStreamBuilder,
  5. };
  6. use crate::{Delimiter, Punct, Spacing, TokenTree};
  7. use core::char;
  8. use core::str::{Bytes, CharIndices, Chars};
  9. #[derive(Copy, Clone, Eq, PartialEq)]
  10. pub(crate) struct Cursor<'a> {
  11. pub(crate) rest: &'a str,
  12. #[cfg(span_locations)]
  13. pub(crate) off: u32,
  14. }
  15. impl<'a> Cursor<'a> {
  16. pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
  17. let (_front, rest) = self.rest.split_at(bytes);
  18. Cursor {
  19. rest,
  20. #[cfg(span_locations)]
  21. off: self.off + _front.chars().count() as u32,
  22. }
  23. }
  24. pub(crate) fn starts_with(&self, s: &str) -> bool {
  25. self.rest.starts_with(s)
  26. }
  27. pub(crate) fn starts_with_char(&self, ch: char) -> bool {
  28. self.rest.starts_with(ch)
  29. }
  30. pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
  31. where
  32. Pattern: FnMut(char) -> bool,
  33. {
  34. self.rest.starts_with(f)
  35. }
  36. pub(crate) fn is_empty(&self) -> bool {
  37. self.rest.is_empty()
  38. }
  39. fn len(&self) -> usize {
  40. self.rest.len()
  41. }
  42. fn as_bytes(&self) -> &'a [u8] {
  43. self.rest.as_bytes()
  44. }
  45. fn bytes(&self) -> Bytes<'a> {
  46. self.rest.bytes()
  47. }
  48. fn chars(&self) -> Chars<'a> {
  49. self.rest.chars()
  50. }
  51. fn char_indices(&self) -> CharIndices<'a> {
  52. self.rest.char_indices()
  53. }
  54. fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
  55. if self.starts_with(tag) {
  56. Ok(self.advance(tag.len()))
  57. } else {
  58. Err(Reject)
  59. }
  60. }
  61. }
  62. pub(crate) struct Reject;
  63. type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
  64. fn skip_whitespace(input: Cursor) -> Cursor {
  65. let mut s = input;
  66. while !s.is_empty() {
  67. let byte = s.as_bytes()[0];
  68. if byte == b'/' {
  69. if s.starts_with("//")
  70. && (!s.starts_with("///") || s.starts_with("////"))
  71. && !s.starts_with("//!")
  72. {
  73. let (cursor, _) = take_until_newline_or_eof(s);
  74. s = cursor;
  75. continue;
  76. } else if s.starts_with("/**/") {
  77. s = s.advance(4);
  78. continue;
  79. } else if s.starts_with("/*")
  80. && (!s.starts_with("/**") || s.starts_with("/***"))
  81. && !s.starts_with("/*!")
  82. {
  83. match block_comment(s) {
  84. Ok((rest, _)) => {
  85. s = rest;
  86. continue;
  87. }
  88. Err(Reject) => return s,
  89. }
  90. }
  91. }
  92. match byte {
  93. b' ' | 0x09..=0x0d => {
  94. s = s.advance(1);
  95. continue;
  96. }
  97. b if b.is_ascii() => {}
  98. _ => {
  99. let ch = s.chars().next().unwrap();
  100. if is_whitespace(ch) {
  101. s = s.advance(ch.len_utf8());
  102. continue;
  103. }
  104. }
  105. }
  106. return s;
  107. }
  108. s
  109. }
  110. fn block_comment(input: Cursor) -> PResult<&str> {
  111. if !input.starts_with("/*") {
  112. return Err(Reject);
  113. }
  114. let mut depth = 0usize;
  115. let bytes = input.as_bytes();
  116. let mut i = 0usize;
  117. let upper = bytes.len() - 1;
  118. while i < upper {
  119. if bytes[i] == b'/' && bytes[i + 1] == b'*' {
  120. depth += 1;
  121. i += 1; // eat '*'
  122. } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
  123. depth -= 1;
  124. if depth == 0 {
  125. return Ok((input.advance(i + 2), &input.rest[..i + 2]));
  126. }
  127. i += 1; // eat '/'
  128. }
  129. i += 1;
  130. }
  131. Err(Reject)
  132. }
  133. fn is_whitespace(ch: char) -> bool {
  134. // Rust treats left-to-right mark and right-to-left mark as whitespace
  135. ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
  136. }
  137. fn word_break(input: Cursor) -> Result<Cursor, Reject> {
  138. match input.chars().next() {
  139. Some(ch) if is_ident_continue(ch) => Err(Reject),
  140. Some(_) | None => Ok(input),
  141. }
  142. }
  143. // Rustc's representation of a macro expansion error in expression position or
  144. // type position.
  145. const ERROR: &str = "(/*ERROR*/)";
  146. pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
  147. let mut trees = TokenStreamBuilder::new();
  148. let mut stack = Vec::new();
  149. loop {
  150. input = skip_whitespace(input);
  151. if let Ok((rest, ())) = doc_comment(input, &mut trees) {
  152. input = rest;
  153. continue;
  154. }
  155. #[cfg(span_locations)]
  156. let lo = input.off;
  157. let first = match input.bytes().next() {
  158. Some(first) => first,
  159. None => match stack.last() {
  160. None => return Ok(trees.build()),
  161. #[cfg(span_locations)]
  162. Some((lo, _frame)) => {
  163. return Err(LexError {
  164. span: Span { lo: *lo, hi: *lo },
  165. })
  166. }
  167. #[cfg(not(span_locations))]
  168. Some(_frame) => return Err(LexError { span: Span {} }),
  169. },
  170. };
  171. if let Some(open_delimiter) = match first {
  172. b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
  173. b'[' => Some(Delimiter::Bracket),
  174. b'{' => Some(Delimiter::Brace),
  175. _ => None,
  176. } {
  177. input = input.advance(1);
  178. let frame = (open_delimiter, trees);
  179. #[cfg(span_locations)]
  180. let frame = (lo, frame);
  181. stack.push(frame);
  182. trees = TokenStreamBuilder::new();
  183. } else if let Some(close_delimiter) = match first {
  184. b')' => Some(Delimiter::Parenthesis),
  185. b']' => Some(Delimiter::Bracket),
  186. b'}' => Some(Delimiter::Brace),
  187. _ => None,
  188. } {
  189. let frame = match stack.pop() {
  190. Some(frame) => frame,
  191. None => return Err(lex_error(input)),
  192. };
  193. #[cfg(span_locations)]
  194. let (lo, frame) = frame;
  195. let (open_delimiter, outer) = frame;
  196. if open_delimiter != close_delimiter {
  197. return Err(lex_error(input));
  198. }
  199. input = input.advance(1);
  200. let mut g = Group::new(open_delimiter, trees.build());
  201. g.set_span(Span {
  202. #[cfg(span_locations)]
  203. lo,
  204. #[cfg(span_locations)]
  205. hi: input.off,
  206. });
  207. trees = outer;
  208. trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
  209. } else {
  210. let (rest, mut tt) = match leaf_token(input) {
  211. Ok((rest, tt)) => (rest, tt),
  212. Err(Reject) => return Err(lex_error(input)),
  213. };
  214. tt.set_span(crate::Span::_new_fallback(Span {
  215. #[cfg(span_locations)]
  216. lo,
  217. #[cfg(span_locations)]
  218. hi: rest.off,
  219. }));
  220. trees.push_token_from_parser(tt);
  221. input = rest;
  222. }
  223. }
  224. }
  225. fn lex_error(cursor: Cursor) -> LexError {
  226. #[cfg(not(span_locations))]
  227. let _ = cursor;
  228. LexError {
  229. span: Span {
  230. #[cfg(span_locations)]
  231. lo: cursor.off,
  232. #[cfg(span_locations)]
  233. hi: cursor.off,
  234. },
  235. }
  236. }
  237. fn leaf_token(input: Cursor) -> PResult<TokenTree> {
  238. if let Ok((input, l)) = literal(input) {
  239. // must be parsed before ident
  240. Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
  241. } else if let Ok((input, p)) = punct(input) {
  242. Ok((input, TokenTree::Punct(p)))
  243. } else if let Ok((input, i)) = ident(input) {
  244. Ok((input, TokenTree::Ident(i)))
  245. } else if input.starts_with(ERROR) {
  246. let rest = input.advance(ERROR.len());
  247. let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
  248. Ok((rest, TokenTree::Literal(repr)))
  249. } else {
  250. Err(Reject)
  251. }
  252. }
  253. fn ident(input: Cursor) -> PResult<crate::Ident> {
  254. if [
  255. "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
  256. ]
  257. .iter()
  258. .any(|prefix| input.starts_with(prefix))
  259. {
  260. Err(Reject)
  261. } else {
  262. ident_any(input)
  263. }
  264. }
  265. fn ident_any(input: Cursor) -> PResult<crate::Ident> {
  266. let raw = input.starts_with("r#");
  267. let rest = input.advance((raw as usize) << 1);
  268. let (rest, sym) = ident_not_raw(rest)?;
  269. if !raw {
  270. let ident =
  271. crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
  272. return Ok((rest, ident));
  273. }
  274. match sym {
  275. "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
  276. _ => {}
  277. }
  278. let ident =
  279. crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
  280. Ok((rest, ident))
  281. }
  282. fn ident_not_raw(input: Cursor) -> PResult<&str> {
  283. let mut chars = input.char_indices();
  284. match chars.next() {
  285. Some((_, ch)) if is_ident_start(ch) => {}
  286. _ => return Err(Reject),
  287. }
  288. let mut end = input.len();
  289. for (i, ch) in chars {
  290. if !is_ident_continue(ch) {
  291. end = i;
  292. break;
  293. }
  294. }
  295. Ok((input.advance(end), &input.rest[..end]))
  296. }
  297. pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
  298. let rest = literal_nocapture(input)?;
  299. let end = input.len() - rest.len();
  300. Ok((rest, Literal::_new(input.rest[..end].to_string())))
  301. }
  302. fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
  303. if let Ok(ok) = string(input) {
  304. Ok(ok)
  305. } else if let Ok(ok) = byte_string(input) {
  306. Ok(ok)
  307. } else if let Ok(ok) = c_string(input) {
  308. Ok(ok)
  309. } else if let Ok(ok) = byte(input) {
  310. Ok(ok)
  311. } else if let Ok(ok) = character(input) {
  312. Ok(ok)
  313. } else if let Ok(ok) = float(input) {
  314. Ok(ok)
  315. } else if let Ok(ok) = int(input) {
  316. Ok(ok)
  317. } else {
  318. Err(Reject)
  319. }
  320. }
  321. fn literal_suffix(input: Cursor) -> Cursor {
  322. match ident_not_raw(input) {
  323. Ok((input, _)) => input,
  324. Err(Reject) => input,
  325. }
  326. }
  327. fn string(input: Cursor) -> Result<Cursor, Reject> {
  328. if let Ok(input) = input.parse("\"") {
  329. cooked_string(input)
  330. } else if let Ok(input) = input.parse("r") {
  331. raw_string(input)
  332. } else {
  333. Err(Reject)
  334. }
  335. }
  336. fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
  337. let mut chars = input.char_indices();
  338. while let Some((i, ch)) = chars.next() {
  339. match ch {
  340. '"' => {
  341. let input = input.advance(i + 1);
  342. return Ok(literal_suffix(input));
  343. }
  344. '\r' => match chars.next() {
  345. Some((_, '\n')) => {}
  346. _ => break,
  347. },
  348. '\\' => match chars.next() {
  349. Some((_, 'x')) => {
  350. backslash_x_char(&mut chars)?;
  351. }
  352. Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
  353. Some((_, 'u')) => {
  354. backslash_u(&mut chars)?;
  355. }
  356. Some((newline, ch @ ('\n' | '\r'))) => {
  357. input = input.advance(newline + 1);
  358. trailing_backslash(&mut input, ch as u8)?;
  359. chars = input.char_indices();
  360. }
  361. _ => break,
  362. },
  363. _ch => {}
  364. }
  365. }
  366. Err(Reject)
  367. }
  368. fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
  369. let (input, delimiter) = delimiter_of_raw_string(input)?;
  370. let mut bytes = input.bytes().enumerate();
  371. while let Some((i, byte)) = bytes.next() {
  372. match byte {
  373. b'"' if input.rest[i + 1..].starts_with(delimiter) => {
  374. let rest = input.advance(i + 1 + delimiter.len());
  375. return Ok(literal_suffix(rest));
  376. }
  377. b'\r' => match bytes.next() {
  378. Some((_, b'\n')) => {}
  379. _ => break,
  380. },
  381. _ => {}
  382. }
  383. }
  384. Err(Reject)
  385. }
  386. fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
  387. if let Ok(input) = input.parse("b\"") {
  388. cooked_byte_string(input)
  389. } else if let Ok(input) = input.parse("br") {
  390. raw_byte_string(input)
  391. } else {
  392. Err(Reject)
  393. }
  394. }
  395. fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
  396. let mut bytes = input.bytes().enumerate();
  397. while let Some((offset, b)) = bytes.next() {
  398. match b {
  399. b'"' => {
  400. let input = input.advance(offset + 1);
  401. return Ok(literal_suffix(input));
  402. }
  403. b'\r' => match bytes.next() {
  404. Some((_, b'\n')) => {}
  405. _ => break,
  406. },
  407. b'\\' => match bytes.next() {
  408. Some((_, b'x')) => {
  409. backslash_x_byte(&mut bytes)?;
  410. }
  411. Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
  412. Some((newline, b @ (b'\n' | b'\r'))) => {
  413. input = input.advance(newline + 1);
  414. trailing_backslash(&mut input, b)?;
  415. bytes = input.bytes().enumerate();
  416. }
  417. _ => break,
  418. },
  419. b if b.is_ascii() => {}
  420. _ => break,
  421. }
  422. }
  423. Err(Reject)
  424. }
  425. fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
  426. for (i, byte) in input.bytes().enumerate() {
  427. match byte {
  428. b'"' => {
  429. if i > 255 {
  430. // https://github.com/rust-lang/rust/pull/95251
  431. return Err(Reject);
  432. }
  433. return Ok((input.advance(i + 1), &input.rest[..i]));
  434. }
  435. b'#' => {}
  436. _ => break,
  437. }
  438. }
  439. Err(Reject)
  440. }
  441. fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
  442. let (input, delimiter) = delimiter_of_raw_string(input)?;
  443. let mut bytes = input.bytes().enumerate();
  444. while let Some((i, byte)) = bytes.next() {
  445. match byte {
  446. b'"' if input.rest[i + 1..].starts_with(delimiter) => {
  447. let rest = input.advance(i + 1 + delimiter.len());
  448. return Ok(literal_suffix(rest));
  449. }
  450. b'\r' => match bytes.next() {
  451. Some((_, b'\n')) => {}
  452. _ => break,
  453. },
  454. other => {
  455. if !other.is_ascii() {
  456. break;
  457. }
  458. }
  459. }
  460. }
  461. Err(Reject)
  462. }
  463. fn c_string(input: Cursor) -> Result<Cursor, Reject> {
  464. if let Ok(input) = input.parse("c\"") {
  465. cooked_c_string(input)
  466. } else if let Ok(input) = input.parse("cr") {
  467. raw_c_string(input)
  468. } else {
  469. Err(Reject)
  470. }
  471. }
  472. fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
  473. let (input, delimiter) = delimiter_of_raw_string(input)?;
  474. let mut bytes = input.bytes().enumerate();
  475. while let Some((i, byte)) = bytes.next() {
  476. match byte {
  477. b'"' if input.rest[i + 1..].starts_with(delimiter) => {
  478. let rest = input.advance(i + 1 + delimiter.len());
  479. return Ok(literal_suffix(rest));
  480. }
  481. b'\r' => match bytes.next() {
  482. Some((_, b'\n')) => {}
  483. _ => break,
  484. },
  485. b'\0' => break,
  486. _ => {}
  487. }
  488. }
  489. Err(Reject)
  490. }
  491. fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
  492. let mut chars = input.char_indices();
  493. while let Some((i, ch)) = chars.next() {
  494. match ch {
  495. '"' => {
  496. let input = input.advance(i + 1);
  497. return Ok(literal_suffix(input));
  498. }
  499. '\r' => match chars.next() {
  500. Some((_, '\n')) => {}
  501. _ => break,
  502. },
  503. '\\' => match chars.next() {
  504. Some((_, 'x')) => {
  505. backslash_x_nonzero(&mut chars)?;
  506. }
  507. Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
  508. Some((_, 'u')) => {
  509. if backslash_u(&mut chars)? == '\0' {
  510. break;
  511. }
  512. }
  513. Some((newline, ch @ ('\n' | '\r'))) => {
  514. input = input.advance(newline + 1);
  515. trailing_backslash(&mut input, ch as u8)?;
  516. chars = input.char_indices();
  517. }
  518. _ => break,
  519. },
  520. '\0' => break,
  521. _ch => {}
  522. }
  523. }
  524. Err(Reject)
  525. }
  526. fn byte(input: Cursor) -> Result<Cursor, Reject> {
  527. let input = input.parse("b'")?;
  528. let mut bytes = input.bytes().enumerate();
  529. let ok = match bytes.next().map(|(_, b)| b) {
  530. Some(b'\\') => match bytes.next().map(|(_, b)| b) {
  531. Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
  532. Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
  533. _ => false,
  534. },
  535. b => b.is_some(),
  536. };
  537. if !ok {
  538. return Err(Reject);
  539. }
  540. let (offset, _) = bytes.next().ok_or(Reject)?;
  541. if !input.chars().as_str().is_char_boundary(offset) {
  542. return Err(Reject);
  543. }
  544. let input = input.advance(offset).parse("'")?;
  545. Ok(literal_suffix(input))
  546. }
  547. fn character(input: Cursor) -> Result<Cursor, Reject> {
  548. let input = input.parse("'")?;
  549. let mut chars = input.char_indices();
  550. let ok = match chars.next().map(|(_, ch)| ch) {
  551. Some('\\') => match chars.next().map(|(_, ch)| ch) {
  552. Some('x') => backslash_x_char(&mut chars).is_ok(),
  553. Some('u') => backslash_u(&mut chars).is_ok(),
  554. Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
  555. _ => false,
  556. },
  557. ch => ch.is_some(),
  558. };
  559. if !ok {
  560. return Err(Reject);
  561. }
  562. let (idx, _) = chars.next().ok_or(Reject)?;
  563. let input = input.advance(idx).parse("'")?;
  564. Ok(literal_suffix(input))
  565. }
  566. macro_rules! next_ch {
  567. ($chars:ident @ $pat:pat) => {
  568. match $chars.next() {
  569. Some((_, ch)) => match ch {
  570. $pat => ch,
  571. _ => return Err(Reject),
  572. },
  573. None => return Err(Reject),
  574. }
  575. };
  576. }
  577. fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
  578. where
  579. I: Iterator<Item = (usize, char)>,
  580. {
  581. next_ch!(chars @ '0'..='7');
  582. next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
  583. Ok(())
  584. }
  585. fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
  586. where
  587. I: Iterator<Item = (usize, u8)>,
  588. {
  589. next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
  590. next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
  591. Ok(())
  592. }
  593. fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
  594. where
  595. I: Iterator<Item = (usize, char)>,
  596. {
  597. let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
  598. let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
  599. if first == '0' && second == '0' {
  600. Err(Reject)
  601. } else {
  602. Ok(())
  603. }
  604. }
  605. fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
  606. where
  607. I: Iterator<Item = (usize, char)>,
  608. {
  609. next_ch!(chars @ '{');
  610. let mut value = 0;
  611. let mut len = 0;
  612. for (_, ch) in chars {
  613. let digit = match ch {
  614. '0'..='9' => ch as u8 - b'0',
  615. 'a'..='f' => 10 + ch as u8 - b'a',
  616. 'A'..='F' => 10 + ch as u8 - b'A',
  617. '_' if len > 0 => continue,
  618. '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
  619. _ => break,
  620. };
  621. if len == 6 {
  622. break;
  623. }
  624. value *= 0x10;
  625. value += u32::from(digit);
  626. len += 1;
  627. }
  628. Err(Reject)
  629. }
  630. fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
  631. let mut whitespace = input.bytes().enumerate();
  632. loop {
  633. if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
  634. return Err(Reject);
  635. }
  636. match whitespace.next() {
  637. Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
  638. last = b;
  639. }
  640. Some((offset, _)) => {
  641. *input = input.advance(offset);
  642. return Ok(());
  643. }
  644. None => return Err(Reject),
  645. }
  646. }
  647. }
  648. fn float(input: Cursor) -> Result<Cursor, Reject> {
  649. let mut rest = float_digits(input)?;
  650. if let Some(ch) = rest.chars().next() {
  651. if is_ident_start(ch) {
  652. rest = ident_not_raw(rest)?.0;
  653. }
  654. }
  655. word_break(rest)
  656. }
  657. fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
  658. let mut chars = input.chars().peekable();
  659. match chars.next() {
  660. Some(ch) if '0' <= ch && ch <= '9' => {}
  661. _ => return Err(Reject),
  662. }
  663. let mut len = 1;
  664. let mut has_dot = false;
  665. let mut has_exp = false;
  666. while let Some(&ch) = chars.peek() {
  667. match ch {
  668. '0'..='9' | '_' => {
  669. chars.next();
  670. len += 1;
  671. }
  672. '.' => {
  673. if has_dot {
  674. break;
  675. }
  676. chars.next();
  677. if chars
  678. .peek()
  679. .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
  680. {
  681. return Err(Reject);
  682. }
  683. len += 1;
  684. has_dot = true;
  685. }
  686. 'e' | 'E' => {
  687. chars.next();
  688. len += 1;
  689. has_exp = true;
  690. break;
  691. }
  692. _ => break,
  693. }
  694. }
  695. if !(has_dot || has_exp) {
  696. return Err(Reject);
  697. }
  698. if has_exp {
  699. let token_before_exp = if has_dot {
  700. Ok(input.advance(len - 1))
  701. } else {
  702. Err(Reject)
  703. };
  704. let mut has_sign = false;
  705. let mut has_exp_value = false;
  706. while let Some(&ch) = chars.peek() {
  707. match ch {
  708. '+' | '-' => {
  709. if has_exp_value {
  710. break;
  711. }
  712. if has_sign {
  713. return token_before_exp;
  714. }
  715. chars.next();
  716. len += 1;
  717. has_sign = true;
  718. }
  719. '0'..='9' => {
  720. chars.next();
  721. len += 1;
  722. has_exp_value = true;
  723. }
  724. '_' => {
  725. chars.next();
  726. len += 1;
  727. }
  728. _ => break,
  729. }
  730. }
  731. if !has_exp_value {
  732. return token_before_exp;
  733. }
  734. }
  735. Ok(input.advance(len))
  736. }
  737. fn int(input: Cursor) -> Result<Cursor, Reject> {
  738. let mut rest = digits(input)?;
  739. if let Some(ch) = rest.chars().next() {
  740. if is_ident_start(ch) {
  741. rest = ident_not_raw(rest)?.0;
  742. }
  743. }
  744. word_break(rest)
  745. }
  746. fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
  747. let base = if input.starts_with("0x") {
  748. input = input.advance(2);
  749. 16
  750. } else if input.starts_with("0o") {
  751. input = input.advance(2);
  752. 8
  753. } else if input.starts_with("0b") {
  754. input = input.advance(2);
  755. 2
  756. } else {
  757. 10
  758. };
  759. let mut len = 0;
  760. let mut empty = true;
  761. for b in input.bytes() {
  762. match b {
  763. b'0'..=b'9' => {
  764. let digit = (b - b'0') as u64;
  765. if digit >= base {
  766. return Err(Reject);
  767. }
  768. }
  769. b'a'..=b'f' => {
  770. let digit = 10 + (b - b'a') as u64;
  771. if digit >= base {
  772. break;
  773. }
  774. }
  775. b'A'..=b'F' => {
  776. let digit = 10 + (b - b'A') as u64;
  777. if digit >= base {
  778. break;
  779. }
  780. }
  781. b'_' => {
  782. if empty && base == 10 {
  783. return Err(Reject);
  784. }
  785. len += 1;
  786. continue;
  787. }
  788. _ => break,
  789. }
  790. len += 1;
  791. empty = false;
  792. }
  793. if empty {
  794. Err(Reject)
  795. } else {
  796. Ok(input.advance(len))
  797. }
  798. }
  799. fn punct(input: Cursor) -> PResult<Punct> {
  800. let (rest, ch) = punct_char(input)?;
  801. if ch == '\'' {
  802. let (after_lifetime, _ident) = ident_any(rest)?;
  803. if after_lifetime.starts_with_char('\'')
  804. || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
  805. {
  806. Err(Reject)
  807. } else {
  808. Ok((rest, Punct::new('\'', Spacing::Joint)))
  809. }
  810. } else {
  811. let kind = match punct_char(rest) {
  812. Ok(_) => Spacing::Joint,
  813. Err(Reject) => Spacing::Alone,
  814. };
  815. Ok((rest, Punct::new(ch, kind)))
  816. }
  817. }
  818. fn punct_char(input: Cursor) -> PResult<char> {
  819. if input.starts_with("//") || input.starts_with("/*") {
  820. // Do not accept `/` of a comment as a punct.
  821. return Err(Reject);
  822. }
  823. let mut chars = input.chars();
  824. let first = match chars.next() {
  825. Some(ch) => ch,
  826. None => {
  827. return Err(Reject);
  828. }
  829. };
  830. let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
  831. if recognized.contains(first) {
  832. Ok((input.advance(first.len_utf8()), first))
  833. } else {
  834. Err(Reject)
  835. }
  836. }
  837. fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
  838. #[cfg(span_locations)]
  839. let lo = input.off;
  840. let (rest, (comment, inner)) = doc_comment_contents(input)?;
  841. let fallback_span = Span {
  842. #[cfg(span_locations)]
  843. lo,
  844. #[cfg(span_locations)]
  845. hi: rest.off,
  846. };
  847. let span = crate::Span::_new_fallback(fallback_span);
  848. let mut scan_for_bare_cr = comment;
  849. while let Some(cr) = scan_for_bare_cr.find('\r') {
  850. let rest = &scan_for_bare_cr[cr + 1..];
  851. if !rest.starts_with('\n') {
  852. return Err(Reject);
  853. }
  854. scan_for_bare_cr = rest;
  855. }
  856. let mut pound = Punct::new('#', Spacing::Alone);
  857. pound.set_span(span);
  858. trees.push_token_from_parser(TokenTree::Punct(pound));
  859. if inner {
  860. let mut bang = Punct::new('!', Spacing::Alone);
  861. bang.set_span(span);
  862. trees.push_token_from_parser(TokenTree::Punct(bang));
  863. }
  864. let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
  865. let mut equal = Punct::new('=', Spacing::Alone);
  866. equal.set_span(span);
  867. let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
  868. literal.set_span(span);
  869. let mut bracketed = TokenStreamBuilder::with_capacity(3);
  870. bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
  871. bracketed.push_token_from_parser(TokenTree::Punct(equal));
  872. bracketed.push_token_from_parser(TokenTree::Literal(literal));
  873. let group = Group::new(Delimiter::Bracket, bracketed.build());
  874. let mut group = crate::Group::_new_fallback(group);
  875. group.set_span(span);
  876. trees.push_token_from_parser(TokenTree::Group(group));
  877. Ok((rest, ()))
  878. }
  879. fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
  880. if input.starts_with("//!") {
  881. let input = input.advance(3);
  882. let (input, s) = take_until_newline_or_eof(input);
  883. Ok((input, (s, true)))
  884. } else if input.starts_with("/*!") {
  885. let (input, s) = block_comment(input)?;
  886. Ok((input, (&s[3..s.len() - 2], true)))
  887. } else if input.starts_with("///") {
  888. let input = input.advance(3);
  889. if input.starts_with_char('/') {
  890. return Err(Reject);
  891. }
  892. let (input, s) = take_until_newline_or_eof(input);
  893. Ok((input, (s, false)))
  894. } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
  895. let (input, s) = block_comment(input)?;
  896. Ok((input, (&s[3..s.len() - 2], false)))
  897. } else {
  898. Err(Reject)
  899. }
  900. }
  901. fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
  902. let chars = input.char_indices();
  903. for (i, ch) in chars {
  904. if ch == '\n' {
  905. return (input.advance(i), &input.rest[..i]);
  906. } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
  907. return (input.advance(i + 1), &input.rest[..i]);
  908. }
  909. }
  910. (input.advance(input.len()), input.rest)
  911. }