rustc_ast/tokenstream.rs
1//! # Token Streams
2//!
3//! `TokenStream`s represent syntactic objects before they are converted into ASTs.
4//! A `TokenStream` is, roughly speaking, a sequence of [`TokenTree`]s,
5//! which are themselves a single [`Token`] or a `Delimited` subsequence of tokens.
6//!
7//! ## Ownership
8//!
9//! `TokenStream`s are persistent data structures constructed as ropes with reference
10//! counted-children. In general, this means that calling an operation on a `TokenStream`
11//! (such as `slice`) produces an entirely new `TokenStream` from the borrowed reference to
12//! the original. This essentially coerces `TokenStream`s into "views" of their subparts,
13//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
14//! ownership of the original.
15
16use std::borrow::Cow;
17use std::ops::Range;
18use std::sync::Arc;
19use std::{cmp, fmt, iter, mem};
20
21use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
22use rustc_data_structures::sync;
23use rustc_macros::{Decodable, Encodable, HashStable_Generic};
24use rustc_serialize::{Decodable, Encodable};
25use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym};
26use thin_vec::ThinVec;
27
28use crate::ast::AttrStyle;
29use crate::ast_traits::{HasAttrs, HasTokens};
30use crate::token::{self, Delimiter, Token, TokenKind};
31use crate::{AttrVec, Attribute};
32
33/// Part of a `TokenStream`.
34#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
35pub enum TokenTree {
36 /// A single token. Should never be `OpenDelim` or `CloseDelim`, because
37 /// delimiters are implicitly represented by `Delimited`.
38 Token(Token, Spacing),
39 /// A delimited sequence of token trees.
40 Delimited(DelimSpan, DelimSpacing, Delimiter, TokenStream),
41}
42
43// Ensure all fields of `TokenTree` are `DynSend` and `DynSync`.
44fn _dummy()
45where
46 Token: sync::DynSend + sync::DynSync,
47 Spacing: sync::DynSend + sync::DynSync,
48 DelimSpan: sync::DynSend + sync::DynSync,
49 Delimiter: sync::DynSend + sync::DynSync,
50 TokenStream: sync::DynSend + sync::DynSync,
51{
52}
53
54impl TokenTree {
55 /// Checks if this `TokenTree` is equal to the other, regardless of span/spacing information.
56 pub fn eq_unspanned(&self, other: &TokenTree) -> bool {
57 match (self, other) {
58 (TokenTree::Token(token, _), TokenTree::Token(token2, _)) => token.kind == token2.kind,
59 (TokenTree::Delimited(.., delim, tts), TokenTree::Delimited(.., delim2, tts2)) => {
60 delim == delim2 && tts.eq_unspanned(tts2)
61 }
62 _ => false,
63 }
64 }
65
66 /// Retrieves the `TokenTree`'s span.
67 pub fn span(&self) -> Span {
68 match self {
69 TokenTree::Token(token, _) => token.span,
70 TokenTree::Delimited(sp, ..) => sp.entire(),
71 }
72 }
73
74 /// Create a `TokenTree::Token` with alone spacing.
75 pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
76 TokenTree::Token(Token::new(kind, span), Spacing::Alone)
77 }
78
79 /// Create a `TokenTree::Token` with joint spacing.
80 pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
81 TokenTree::Token(Token::new(kind, span), Spacing::Joint)
82 }
83
84 /// Create a `TokenTree::Token` with joint-hidden spacing.
85 pub fn token_joint_hidden(kind: TokenKind, span: Span) -> TokenTree {
86 TokenTree::Token(Token::new(kind, span), Spacing::JointHidden)
87 }
88
89 pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
90 match self {
91 TokenTree::Token(token, spacing) => match token.uninterpolate() {
92 Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
93 Cow::Borrowed(_) => Cow::Borrowed(self),
94 },
95 _ => Cow::Borrowed(self),
96 }
97 }
98}
99
100impl<CTX> HashStable<CTX> for TokenStream
101where
102 CTX: crate::HashStableContext,
103{
104 fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
105 for sub_tt in self.iter() {
106 sub_tt.hash_stable(hcx, hasher);
107 }
108 }
109}
110
111/// A lazy version of [`AttrTokenStream`], which defers creation of an actual
112/// `AttrTokenStream` until it is needed.
113#[derive(Clone)]
114pub struct LazyAttrTokenStream(Arc<LazyAttrTokenStreamInner>);
115
116impl LazyAttrTokenStream {
117 pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream {
118 LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Direct(stream)))
119 }
120
121 pub fn new_pending(
122 start_token: (Token, Spacing),
123 cursor_snapshot: TokenCursor,
124 num_calls: u32,
125 break_last_token: u32,
126 node_replacements: ThinVec<NodeReplacement>,
127 ) -> LazyAttrTokenStream {
128 LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending {
129 start_token,
130 cursor_snapshot,
131 num_calls,
132 break_last_token,
133 node_replacements,
134 }))
135 }
136
137 pub fn to_attr_token_stream(&self) -> AttrTokenStream {
138 self.0.to_attr_token_stream()
139 }
140}
141
142impl fmt::Debug for LazyAttrTokenStream {
143 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144 write!(f, "LazyAttrTokenStream({:?})", self.to_attr_token_stream())
145 }
146}
147
148impl<S: SpanEncoder> Encodable<S> for LazyAttrTokenStream {
149 fn encode(&self, _s: &mut S) {
150 panic!("Attempted to encode LazyAttrTokenStream");
151 }
152}
153
154impl<D: SpanDecoder> Decodable<D> for LazyAttrTokenStream {
155 fn decode(_d: &mut D) -> Self {
156 panic!("Attempted to decode LazyAttrTokenStream");
157 }
158}
159
160impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
161 fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
162 panic!("Attempted to compute stable hash for LazyAttrTokenStream");
163 }
164}
165
166/// A token range within a `Parser`'s full token stream.
167#[derive(Clone, Debug)]
168pub struct ParserRange(pub Range<u32>);
169
170/// A token range within an individual AST node's (lazy) token stream, i.e.
171/// relative to that node's first token. Distinct from `ParserRange` so the two
172/// kinds of range can't be mixed up.
173#[derive(Clone, Debug)]
174pub struct NodeRange(pub Range<u32>);
175
176/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
177/// (replacement) or be replaced by nothing (deletion). This is used in two
178/// places during token collection.
179///
180/// 1. Replacement. During the parsing of an AST node that may have a
181/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
182/// or `#[cfg_attr]`, we replace the entire inner AST node with
183/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
184/// `AttrTokenStream`.
185///
186/// 2. Deletion. We delete inner attributes from all collected token streams,
187/// and instead track them through the `attrs` field on the AST node. This
188/// lets us manipulate them similarly to outer attributes. When we create a
189/// `TokenStream`, the inner attributes are inserted into the proper place
190/// in the token stream.
191///
192/// Each replacement starts off in `ParserReplacement` form but is converted to
193/// `NodeReplacement` form when it is attached to a single AST node, via
194/// `LazyAttrTokenStreamImpl`.
195pub type ParserReplacement = (ParserRange, Option<AttrsTarget>);
196
197/// See the comment on `ParserReplacement`.
198pub type NodeReplacement = (NodeRange, Option<AttrsTarget>);
199
200impl NodeRange {
201 // Converts a range within a parser's tokens to a range within a
202 // node's tokens beginning at `start_pos`.
203 //
204 // For example, imagine a parser with 50 tokens in its token stream, a
205 // function that spans `ParserRange(20..40)` and an inner attribute within
206 // that function that spans `ParserRange(30..35)`. We would find the inner
207 // attribute's range within the function's tokens by subtracting 20, which
208 // is the position of the function's start token. This gives
209 // `NodeRange(10..15)`.
210 pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
211 assert!(!parser_range.is_empty());
212 assert!(parser_range.start >= start_pos);
213 NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
214 }
215}
216
217enum LazyAttrTokenStreamInner {
218 // The token stream has already been produced.
219 Direct(AttrTokenStream),
220
221 // From a value of this type we can reconstruct the `TokenStream` seen by
222 // the `f` callback passed to a call to `Parser::collect_tokens`, by
223 // replaying the getting of the tokens. This saves us producing a
224 // `TokenStream` if it is never needed, e.g. a captured `macro_rules!`
225 // argument that is never passed to a proc macro. In practice, token stream
226 // creation happens rarely compared to calls to `collect_tokens` (see some
227 // statistics in #78736) so we are doing as little up-front work as
228 // possible.
229 //
230 // This also makes `Parser` very cheap to clone, since there is no
231 // intermediate collection buffer to clone.
232 Pending {
233 start_token: (Token, Spacing),
234 cursor_snapshot: TokenCursor,
235 num_calls: u32,
236 break_last_token: u32,
237 node_replacements: ThinVec<NodeReplacement>,
238 },
239}
240
241impl LazyAttrTokenStreamInner {
242 fn to_attr_token_stream(&self) -> AttrTokenStream {
243 match self {
244 LazyAttrTokenStreamInner::Direct(stream) => stream.clone(),
245 LazyAttrTokenStreamInner::Pending {
246 start_token,
247 cursor_snapshot,
248 num_calls,
249 break_last_token,
250 node_replacements,
251 } => {
252 // The token produced by the final call to `{,inlined_}next` was not
253 // actually consumed by the callback. The combination of chaining the
254 // initial token and using `take` produces the desired result - we
255 // produce an empty `TokenStream` if no calls were made, and omit the
256 // final token otherwise.
257 let mut cursor_snapshot = cursor_snapshot.clone();
258 let tokens = iter::once(FlatToken::Token(*start_token))
259 .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
260 .take(*num_calls as usize);
261
262 if node_replacements.is_empty() {
263 make_attr_token_stream(tokens, *break_last_token)
264 } else {
265 let mut tokens: Vec<_> = tokens.collect();
266 let mut node_replacements = node_replacements.to_vec();
267 node_replacements.sort_by_key(|(range, _)| range.0.start);
268
269 #[cfg(debug_assertions)]
270 for [(node_range, tokens), (next_node_range, next_tokens)] in
271 node_replacements.array_windows()
272 {
273 assert!(
274 node_range.0.end <= next_node_range.0.start
275 || node_range.0.end >= next_node_range.0.end,
276 "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
277 node_range,
278 tokens,
279 next_node_range,
280 next_tokens,
281 );
282 }
283
284 // Process the replace ranges, starting from the highest start
285 // position and working our way back. If have tokens like:
286 //
287 // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
288 //
289 // Then we will generate replace ranges for both
290 // the `#[cfg(FALSE)] field: bool` and the entire
291 // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
292 //
293 // By starting processing from the replace range with the greatest
294 // start position, we ensure that any (outer) replace range which
295 // encloses another (inner) replace range will fully overwrite the
296 // inner range's replacement.
297 for (node_range, target) in node_replacements.into_iter().rev() {
298 assert!(
299 !node_range.0.is_empty(),
300 "Cannot replace an empty node range: {:?}",
301 node_range.0
302 );
303
304 // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
305 // plus enough `FlatToken::Empty`s to fill up the rest of the range. This
306 // keeps the total length of `tokens` constant throughout the replacement
307 // process, allowing us to do all replacements without adjusting indices.
308 let target_len = target.is_some() as usize;
309 tokens.splice(
310 (node_range.0.start as usize)..(node_range.0.end as usize),
311 target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
312 iter::repeat(FlatToken::Empty)
313 .take(node_range.0.len() - target_len),
314 ),
315 );
316 }
317 make_attr_token_stream(tokens.into_iter(), *break_last_token)
318 }
319 }
320 }
321 }
322}
323
324/// A helper struct used when building an `AttrTokenStream` from
325/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
326/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
327/// is then 'parsed' to build up an `AttrTokenStream` with nested
328/// `AttrTokenTree::Delimited` tokens.
329#[derive(Debug, Clone)]
330enum FlatToken {
331 /// A token - this holds both delimiter (e.g. '{' and '}')
332 /// and non-delimiter tokens
333 Token((Token, Spacing)),
334 /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
335 /// directly into the constructed `AttrTokenStream` as an
336 /// `AttrTokenTree::AttrsTarget`.
337 AttrsTarget(AttrsTarget),
338 /// A special 'empty' token that is ignored during the conversion
339 /// to an `AttrTokenStream`. This is used to simplify the
340 /// handling of replace ranges.
341 Empty,
342}
343
344/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
345/// information about the tokens for attribute targets. This is used
346/// during expansion to perform early cfg-expansion, and to process attributes
347/// during proc-macro invocations.
348#[derive(Clone, Debug, Default, Encodable, Decodable)]
349pub struct AttrTokenStream(pub Arc<Vec<AttrTokenTree>>);
350
351/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
352/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
353/// close delims.
354fn make_attr_token_stream(
355 iter: impl Iterator<Item = FlatToken>,
356 break_last_token: u32,
357) -> AttrTokenStream {
358 #[derive(Debug)]
359 struct FrameData {
360 // This is `None` for the first frame, `Some` for all others.
361 open_delim_sp: Option<(Delimiter, Span, Spacing)>,
362 inner: Vec<AttrTokenTree>,
363 }
364 // The stack always has at least one element. Storing it separately makes for shorter code.
365 let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
366 let mut stack_rest = vec![];
367 for flat_token in iter {
368 match flat_token {
369 FlatToken::Token((token @ Token { kind, span }, spacing)) => {
370 if let Some(delim) = kind.open_delim() {
371 stack_rest.push(mem::replace(
372 &mut stack_top,
373 FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
374 ));
375 } else if let Some(delim) = kind.close_delim() {
376 let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
377 let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
378 assert!(
379 open_delim.eq_ignoring_invisible_origin(&delim),
380 "Mismatched open/close delims: open={open_delim:?} close={span:?}"
381 );
382 let dspan = DelimSpan::from_pair(open_sp, span);
383 let dspacing = DelimSpacing::new(open_spacing, spacing);
384 let stream = AttrTokenStream::new(frame_data.inner);
385 let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
386 stack_top.inner.push(delimited);
387 } else {
388 stack_top.inner.push(AttrTokenTree::Token(token, spacing))
389 }
390 }
391 FlatToken::AttrsTarget(target) => {
392 stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
393 }
394 FlatToken::Empty => {}
395 }
396 }
397
398 if break_last_token > 0 {
399 let last_token = stack_top.inner.pop().unwrap();
400 if let AttrTokenTree::Token(last_token, spacing) = last_token {
401 let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
402
403 // Tokens are always ASCII chars, so we can use byte arithmetic here.
404 let mut first_span = last_token.span.shrink_to_lo();
405 first_span =
406 first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
407
408 stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
409 } else {
410 panic!("Unexpected last token {last_token:?}")
411 }
412 }
413 AttrTokenStream::new(stack_top.inner)
414}
415
416/// Like `TokenTree`, but for `AttrTokenStream`.
417#[derive(Clone, Debug, Encodable, Decodable)]
418pub enum AttrTokenTree {
419 Token(Token, Spacing),
420 Delimited(DelimSpan, DelimSpacing, Delimiter, AttrTokenStream),
421 /// Stores the attributes for an attribute target,
422 /// along with the tokens for that attribute target.
423 /// See `AttrsTarget` for more information
424 AttrsTarget(AttrsTarget),
425}
426
427impl AttrTokenStream {
428 pub fn new(tokens: Vec<AttrTokenTree>) -> AttrTokenStream {
429 AttrTokenStream(Arc::new(tokens))
430 }
431
432 /// Converts this `AttrTokenStream` to a plain `Vec<TokenTree>`. During
433 /// conversion, any `AttrTokenTree::AttrsTarget` gets "flattened" back to a
434 /// `TokenStream`, as described in the comment on
435 /// `attrs_and_tokens_to_token_trees`.
436 pub fn to_token_trees(&self) -> Vec<TokenTree> {
437 let mut res = Vec::with_capacity(self.0.len());
438 for tree in self.0.iter() {
439 match tree {
440 AttrTokenTree::Token(inner, spacing) => {
441 res.push(TokenTree::Token(inner.clone(), *spacing));
442 }
443 AttrTokenTree::Delimited(span, spacing, delim, stream) => {
444 res.push(TokenTree::Delimited(
445 *span,
446 *spacing,
447 *delim,
448 TokenStream::new(stream.to_token_trees()),
449 ))
450 }
451 AttrTokenTree::AttrsTarget(target) => {
452 attrs_and_tokens_to_token_trees(&target.attrs, &target.tokens, &mut res);
453 }
454 }
455 }
456 res
457 }
458}
459
460// Converts multiple attributes and the tokens for a target AST node into token trees, and appends
461// them to `res`.
462//
463// Example: if the AST node is "fn f() { blah(); }", then:
464// - Simple if no attributes are present, e.g. "fn f() { blah(); }"
465// - Simple if only outer attribute are present, e.g. "#[outer1] #[outer2] fn f() { blah(); }"
466// - Trickier if inner attributes are present, because they must be moved within the AST node's
467// tokens, e.g. "#[outer] fn f() { #![inner] blah() }"
468fn attrs_and_tokens_to_token_trees(
469 attrs: &[Attribute],
470 target_tokens: &LazyAttrTokenStream,
471 res: &mut Vec<TokenTree>,
472) {
473 let idx = attrs.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer));
474 let (outer_attrs, inner_attrs) = attrs.split_at(idx);
475
476 // Add outer attribute tokens.
477 for attr in outer_attrs {
478 res.extend(attr.token_trees());
479 }
480
481 // Add target AST node tokens.
482 res.extend(target_tokens.to_attr_token_stream().to_token_trees());
483
484 // Insert inner attribute tokens.
485 if !inner_attrs.is_empty() {
486 let found = insert_inner_attrs(inner_attrs, res);
487 assert!(found, "Failed to find trailing delimited group in: {res:?}");
488 }
489
490 // Inner attributes are only supported on blocks, functions, impls, and
491 // modules. All of these have their inner attributes placed at the
492 // beginning of the rightmost outermost braced group:
493 // e.g. `fn foo() { #![my_attr] }`. (Note: the braces may be within
494 // invisible delimiters.)
495 //
496 // Therefore, we can insert them back into the right location without
497 // needing to do any extra position tracking.
498 //
499 // Note: Outline modules are an exception - they can have attributes like
500 // `#![my_attr]` at the start of a file. Support for custom attributes in
501 // this position is not properly implemented - we always synthesize fake
502 // tokens, so we never reach this code.
503 fn insert_inner_attrs(inner_attrs: &[Attribute], tts: &mut Vec<TokenTree>) -> bool {
504 for tree in tts.iter_mut().rev() {
505 if let TokenTree::Delimited(span, spacing, Delimiter::Brace, stream) = tree {
506 // Found it: the rightmost, outermost braced group.
507 let mut tts = vec![];
508 for inner_attr in inner_attrs {
509 tts.extend(inner_attr.token_trees());
510 }
511 tts.extend(stream.0.iter().cloned());
512 let stream = TokenStream::new(tts);
513 *tree = TokenTree::Delimited(*span, *spacing, Delimiter::Brace, stream);
514 return true;
515 } else if let TokenTree::Delimited(span, spacing, Delimiter::Invisible(src), stream) =
516 tree
517 {
518 // Recurse inside invisible delimiters.
519 let mut vec: Vec<_> = stream.iter().cloned().collect();
520 if insert_inner_attrs(inner_attrs, &mut vec) {
521 *tree = TokenTree::Delimited(
522 *span,
523 *spacing,
524 Delimiter::Invisible(*src),
525 TokenStream::new(vec),
526 );
527 return true;
528 }
529 }
530 }
531 false
532 }
533}
534
535/// Stores the tokens for an attribute target, along
536/// with its attributes.
537///
538/// This is constructed during parsing when we need to capture
539/// tokens, for `cfg` and `cfg_attr` attributes.
540///
541/// For example, `#[cfg(FALSE)] struct Foo {}` would
542/// have an `attrs` field containing the `#[cfg(FALSE)]` attr,
543/// and a `tokens` field storing the (unparsed) tokens `struct Foo {}`
544///
545/// The `cfg`/`cfg_attr` processing occurs in
546/// `StripUnconfigured::configure_tokens`.
547#[derive(Clone, Debug, Encodable, Decodable)]
548pub struct AttrsTarget {
549 /// Attributes, both outer and inner.
550 /// These are stored in the original order that they were parsed in.
551 pub attrs: AttrVec,
552 /// The underlying tokens for the attribute target that `attrs`
553 /// are applied to
554 pub tokens: LazyAttrTokenStream,
555}
556
557/// A `TokenStream` is an abstract sequence of tokens, organized into [`TokenTree`]s.
558#[derive(Clone, Debug, Default, Encodable, Decodable)]
559pub struct TokenStream(pub(crate) Arc<Vec<TokenTree>>);
560
561/// Indicates whether a token can join with the following token to form a
562/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
563/// guide pretty-printing, which is where the `JointHidden` value (which isn't
564/// part of `proc_macro::Spacing`) comes in useful.
565#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
566pub enum Spacing {
567 /// The token cannot join with the following token to form a compound
568 /// token.
569 ///
570 /// In token streams parsed from source code, the compiler will use `Alone`
571 /// for any token immediately followed by whitespace, a non-doc comment, or
572 /// EOF.
573 ///
574 /// When constructing token streams within the compiler, use this for each
575 /// token that (a) should be pretty-printed with a space after it, or (b)
576 /// is the last token in the stream. (In the latter case the choice of
577 /// spacing doesn't matter because it is never used for the last token. We
578 /// arbitrarily use `Alone`.)
579 ///
580 /// Converts to `proc_macro::Spacing::Alone`, and
581 /// `proc_macro::Spacing::Alone` converts back to this.
582 Alone,
583
584 /// The token can join with the following token to form a compound token.
585 ///
586 /// In token streams parsed from source code, the compiler will use `Joint`
587 /// for any token immediately followed by punctuation (as determined by
588 /// `Token::is_punct`).
589 ///
590 /// When constructing token streams within the compiler, use this for each
591 /// token that (a) should be pretty-printed without a space after it, and
592 /// (b) is followed by a punctuation token.
593 ///
594 /// Converts to `proc_macro::Spacing::Joint`, and
595 /// `proc_macro::Spacing::Joint` converts back to this.
596 Joint,
597
598 /// The token can join with the following token to form a compound token,
599 /// but this will not be visible at the proc macro level. (This is what the
600 /// `Hidden` means; see below.)
601 ///
602 /// In token streams parsed from source code, the compiler will use
603 /// `JointHidden` for any token immediately followed by anything not
604 /// covered by the `Alone` and `Joint` cases: an identifier, lifetime,
605 /// literal, delimiter, doc comment.
606 ///
607 /// When constructing token streams, use this for each token that (a)
608 /// should be pretty-printed without a space after it, and (b) is followed
609 /// by a non-punctuation token.
610 ///
611 /// Converts to `proc_macro::Spacing::Alone`, but
612 /// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`.
613 /// Because of that, pretty-printing of `TokenStream`s produced by proc
614 /// macros is unavoidably uglier (with more whitespace between tokens) than
615 /// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed
616 /// source code, internally constructed token streams, and token streams
617 /// produced by declarative macros).
618 JointHidden,
619}
620
621impl TokenStream {
622 /// Given a `TokenStream` with a `Stream` of only two arguments, return a new `TokenStream`
623 /// separating the two arguments with a comma for diagnostic suggestions.
624 pub fn add_comma(&self) -> Option<(TokenStream, Span)> {
625 // Used to suggest if a user writes `foo!(a b);`
626 let mut suggestion = None;
627 let mut iter = self.0.iter().enumerate().peekable();
628 while let Some((pos, ts)) = iter.next() {
629 if let Some((_, next)) = iter.peek() {
630 let sp = match (&ts, &next) {
631 (_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
632 (
633 TokenTree::Token(token_left, Spacing::Alone),
634 TokenTree::Token(token_right, _),
635 ) if ((token_left.is_ident() && !token_left.is_reserved_ident())
636 || token_left.is_lit())
637 && ((token_right.is_ident() && !token_right.is_reserved_ident())
638 || token_right.is_lit()) =>
639 {
640 token_left.span
641 }
642 (TokenTree::Delimited(sp, ..), _) => sp.entire(),
643 _ => continue,
644 };
645 let sp = sp.shrink_to_hi();
646 let comma = TokenTree::token_alone(token::Comma, sp);
647 suggestion = Some((pos, comma, sp));
648 }
649 }
650 if let Some((pos, comma, sp)) = suggestion {
651 let mut new_stream = Vec::with_capacity(self.0.len() + 1);
652 let parts = self.0.split_at(pos + 1);
653 new_stream.extend_from_slice(parts.0);
654 new_stream.push(comma);
655 new_stream.extend_from_slice(parts.1);
656 return Some((TokenStream::new(new_stream), sp));
657 }
658 None
659 }
660}
661
662impl FromIterator<TokenTree> for TokenStream {
663 fn from_iter<I: IntoIterator<Item = TokenTree>>(iter: I) -> Self {
664 TokenStream::new(iter.into_iter().collect::<Vec<TokenTree>>())
665 }
666}
667
668impl Eq for TokenStream {}
669
670impl PartialEq<TokenStream> for TokenStream {
671 fn eq(&self, other: &TokenStream) -> bool {
672 self.iter().eq(other.iter())
673 }
674}
675
676impl TokenStream {
677 pub fn new(tts: Vec<TokenTree>) -> TokenStream {
678 TokenStream(Arc::new(tts))
679 }
680
681 pub fn is_empty(&self) -> bool {
682 self.0.is_empty()
683 }
684
685 pub fn len(&self) -> usize {
686 self.0.len()
687 }
688
689 pub fn get(&self, index: usize) -> Option<&TokenTree> {
690 self.0.get(index)
691 }
692
693 pub fn iter(&self) -> TokenStreamIter<'_> {
694 TokenStreamIter::new(self)
695 }
696
697 /// Compares two `TokenStream`s, checking equality without regarding span information.
698 pub fn eq_unspanned(&self, other: &TokenStream) -> bool {
699 let mut iter1 = self.iter();
700 let mut iter2 = other.iter();
701 for (tt1, tt2) in iter::zip(&mut iter1, &mut iter2) {
702 if !tt1.eq_unspanned(tt2) {
703 return false;
704 }
705 }
706 iter1.next().is_none() && iter2.next().is_none()
707 }
708
709 /// Create a token stream containing a single token with alone spacing. The
710 /// spacing used for the final token in a constructed stream doesn't matter
711 /// because it's never used. In practice we arbitrarily use
712 /// `Spacing::Alone`.
713 pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
714 TokenStream::new(vec![TokenTree::token_alone(kind, span)])
715 }
716
717 pub fn from_ast(node: &(impl HasAttrs + HasTokens + fmt::Debug)) -> TokenStream {
718 let tokens = node.tokens().unwrap_or_else(|| panic!("missing tokens for node: {:?}", node));
719 let mut tts = vec![];
720 attrs_and_tokens_to_token_trees(node.attrs(), tokens, &mut tts);
721 TokenStream::new(tts)
722 }
723
724 // If `vec` is not empty, try to glue `tt` onto its last token. The return
725 // value indicates if gluing took place.
726 fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
727 if let Some(TokenTree::Token(last_tok, Spacing::Joint | Spacing::JointHidden)) = vec.last()
728 && let TokenTree::Token(tok, spacing) = tt
729 && let Some(glued_tok) = last_tok.glue(tok)
730 {
731 // ...then overwrite the last token tree in `vec` with the
732 // glued token, and skip the first token tree from `stream`.
733 *vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
734 true
735 } else {
736 false
737 }
738 }
739
740 /// Push `tt` onto the end of the stream, possibly gluing it to the last
741 /// token. Uses `make_mut` to maximize efficiency.
742 pub fn push_tree(&mut self, tt: TokenTree) {
743 let vec_mut = Arc::make_mut(&mut self.0);
744
745 if Self::try_glue_to_last(vec_mut, &tt) {
746 // nothing else to do
747 } else {
748 vec_mut.push(tt);
749 }
750 }
751
752 /// Push `stream` onto the end of the stream, possibly gluing the first
753 /// token tree to the last token. (No other token trees will be glued.)
754 /// Uses `make_mut` to maximize efficiency.
755 pub fn push_stream(&mut self, stream: TokenStream) {
756 let vec_mut = Arc::make_mut(&mut self.0);
757
758 let stream_iter = stream.0.iter().cloned();
759
760 if let Some(first) = stream.0.first()
761 && Self::try_glue_to_last(vec_mut, first)
762 {
763 // Now skip the first token tree from `stream`.
764 vec_mut.extend(stream_iter.skip(1));
765 } else {
766 // Append all of `stream`.
767 vec_mut.extend(stream_iter);
768 }
769 }
770
771 pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
772 self.0.chunks(chunk_size)
773 }
774
775 /// Desugar doc comments like `/// foo` in the stream into `#[doc =
776 /// r"foo"]`. Modifies the `TokenStream` via `Arc::make_mut`, but as little
777 /// as possible.
778 pub fn desugar_doc_comments(&mut self) {
779 if let Some(desugared_stream) = desugar_inner(self.clone()) {
780 *self = desugared_stream;
781 }
782
783 // The return value is `None` if nothing in `stream` changed.
784 fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
785 let mut i = 0;
786 let mut modified = false;
787 while let Some(tt) = stream.0.get(i) {
788 match tt {
789 &TokenTree::Token(
790 Token { kind: token::DocComment(_, attr_style, data), span },
791 _spacing,
792 ) => {
793 let desugared = desugared_tts(attr_style, data, span);
794 let desugared_len = desugared.len();
795 Arc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
796 modified = true;
797 i += desugared_len;
798 }
799
800 &TokenTree::Token(..) => i += 1,
801
802 &TokenTree::Delimited(sp, spacing, delim, ref delim_stream) => {
803 if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
804 let new_tt =
805 TokenTree::Delimited(sp, spacing, delim, desugared_delim_stream);
806 Arc::make_mut(&mut stream.0)[i] = new_tt;
807 modified = true;
808 }
809 i += 1;
810 }
811 }
812 }
813 if modified { Some(stream) } else { None }
814 }
815
816 fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
817 // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
818 // required to wrap the text. E.g.
819 // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
820 // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
821 // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
822 let mut num_of_hashes = 0;
823 let mut count = 0;
824 for ch in data.as_str().chars() {
825 count = match ch {
826 '"' => 1,
827 '#' if count > 0 => count + 1,
828 _ => 0,
829 };
830 num_of_hashes = cmp::max(num_of_hashes, count);
831 }
832
833 // `/// foo` becomes `[doc = r"foo"]`.
834 let delim_span = DelimSpan::from_single(span);
835 let body = TokenTree::Delimited(
836 delim_span,
837 DelimSpacing::new(Spacing::JointHidden, Spacing::Alone),
838 Delimiter::Bracket,
839 [
840 TokenTree::token_alone(token::Ident(sym::doc, token::IdentIsRaw::No), span),
841 TokenTree::token_alone(token::Eq, span),
842 TokenTree::token_alone(
843 TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
844 span,
845 ),
846 ]
847 .into_iter()
848 .collect::<TokenStream>(),
849 );
850
851 if attr_style == AttrStyle::Inner {
852 vec![
853 TokenTree::token_joint(token::Pound, span),
854 TokenTree::token_joint_hidden(token::Bang, span),
855 body,
856 ]
857 } else {
858 vec![TokenTree::token_joint_hidden(token::Pound, span), body]
859 }
860 }
861 }
862}
863
864#[derive(Clone)]
865pub struct TokenStreamIter<'t> {
866 stream: &'t TokenStream,
867 index: usize,
868}
869
870impl<'t> TokenStreamIter<'t> {
871 fn new(stream: &'t TokenStream) -> Self {
872 TokenStreamIter { stream, index: 0 }
873 }
874
875 // Peeking could be done via `Peekable`, but most iterators need peeking,
876 // and this is simple and avoids the need to use `peekable` and `Peekable`
877 // at all the use sites.
878 pub fn peek(&self) -> Option<&'t TokenTree> {
879 self.stream.0.get(self.index)
880 }
881}
882
883impl<'t> Iterator for TokenStreamIter<'t> {
884 type Item = &'t TokenTree;
885
886 fn next(&mut self) -> Option<&'t TokenTree> {
887 self.stream.0.get(self.index).map(|tree| {
888 self.index += 1;
889 tree
890 })
891 }
892}
893
894#[derive(Clone, Debug)]
895pub struct TokenTreeCursor {
896 stream: TokenStream,
897 /// Points to the current token tree in the stream. In `TokenCursor::curr`,
898 /// this can be any token tree. In `TokenCursor::stack`, this is always a
899 /// `TokenTree::Delimited`.
900 index: usize,
901}
902
903impl TokenTreeCursor {
904 #[inline]
905 pub fn new(stream: TokenStream) -> Self {
906 TokenTreeCursor { stream, index: 0 }
907 }
908
909 #[inline]
910 pub fn curr(&self) -> Option<&TokenTree> {
911 self.stream.get(self.index)
912 }
913
914 pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
915 self.stream.get(self.index + n)
916 }
917
918 #[inline]
919 pub fn bump(&mut self) {
920 self.index += 1;
921 }
922}
923
924/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
925/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
926/// use this type to emit them as a linear sequence. But a linear sequence is
927/// what the parser expects, for the most part.
928#[derive(Clone, Debug)]
929pub struct TokenCursor {
930 // Cursor for the current (innermost) token stream. The index within the
931 // cursor can point to any token tree in the stream (or one past the end).
932 // The delimiters for this token stream are found in `self.stack.last()`;
933 // if that is `None` we are in the outermost token stream which never has
934 // delimiters.
935 pub curr: TokenTreeCursor,
936
937 // Token streams surrounding the current one. The index within each cursor
938 // always points to a `TokenTree::Delimited`.
939 pub stack: Vec<TokenTreeCursor>,
940}
941
942impl TokenCursor {
943 pub fn next(&mut self) -> (Token, Spacing) {
944 self.inlined_next()
945 }
946
947 /// This always-inlined version should only be used on hot code paths.
948 #[inline(always)]
949 pub fn inlined_next(&mut self) -> (Token, Spacing) {
950 loop {
951 // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
952 // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
953 // below can be removed.
954 if let Some(tree) = self.curr.curr() {
955 match tree {
956 &TokenTree::Token(token, spacing) => {
957 debug_assert!(!token.kind.is_delim());
958 let res = (token, spacing);
959 self.curr.bump();
960 return res;
961 }
962 &TokenTree::Delimited(sp, spacing, delim, ref tts) => {
963 let trees = TokenTreeCursor::new(tts.clone());
964 self.stack.push(mem::replace(&mut self.curr, trees));
965 if !delim.skip() {
966 return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open);
967 }
968 // No open delimiter to return; continue on to the next iteration.
969 }
970 };
971 } else if let Some(parent) = self.stack.pop() {
972 // We have exhausted this token stream. Move back to its parent token stream.
973 let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
974 panic!("parent should be Delimited")
975 };
976 self.curr = parent;
977 self.curr.bump(); // move past the `Delimited`
978 if !delim.skip() {
979 return (Token::new(delim.as_close_token_kind(), span.close), spacing.close);
980 }
981 // No close delimiter to return; continue on to the next iteration.
982 } else {
983 // We have exhausted the outermost token stream. The use of
984 // `Spacing::Alone` is arbitrary and immaterial, because the
985 // `Eof` token's spacing is never used.
986 return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
987 }
988 }
989 }
990}
991
992#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
993pub struct DelimSpan {
994 pub open: Span,
995 pub close: Span,
996}
997
998impl DelimSpan {
999 pub fn from_single(sp: Span) -> Self {
1000 DelimSpan { open: sp, close: sp }
1001 }
1002
1003 pub fn from_pair(open: Span, close: Span) -> Self {
1004 DelimSpan { open, close }
1005 }
1006
1007 pub fn dummy() -> Self {
1008 Self::from_single(DUMMY_SP)
1009 }
1010
1011 pub fn entire(self) -> Span {
1012 self.open.with_hi(self.close.hi())
1013 }
1014}
1015
1016#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
1017pub struct DelimSpacing {
1018 pub open: Spacing,
1019 pub close: Spacing,
1020}
1021
1022impl DelimSpacing {
1023 pub fn new(open: Spacing, close: Spacing) -> DelimSpacing {
1024 DelimSpacing { open, close }
1025 }
1026}
1027
1028// Some types are used a lot. Make sure they don't unintentionally get bigger.
1029#[cfg(target_pointer_width = "64")]
1030mod size_asserts {
1031 use rustc_data_structures::static_assert_size;
1032
1033 use super::*;
1034 // tidy-alphabetical-start
1035 static_assert_size!(AttrTokenStream, 8);
1036 static_assert_size!(AttrTokenTree, 32);
1037 static_assert_size!(LazyAttrTokenStream, 8);
1038 static_assert_size!(LazyAttrTokenStreamInner, 88);
1039 static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
1040 static_assert_size!(TokenStream, 8);
1041 static_assert_size!(TokenTree, 32);
1042 // tidy-alphabetical-end
1043}