Skip to content

Commit 593fdd3

Browse files
committed
Rewrite collect_tokens implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the stream as we go allong, pushing open/close delimiters to our buffer just like regular tokens. One capturing is complete, we reconstruct a nested `TokenTree::Delimited` structure, producing a normal `TokenStream`. The reconstructed `TokenStream` is not created immediately - instead, it is produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This closure stores a clone of the original `TokenCursor`, plus a record of the number of calls to `next()/next_desugared()`. This is sufficient to reconstruct the tokenstream seen by the callback without storing any additional state. If the tokenstream is never used (e.g. when a captured `macro_rules!` argument is never passed to a proc macro), we never actually create a `TokenStream`. This implementation has a number of advantages over the previous one: * It is significantly simpler, with no edge cases around capturing the start/end of a delimited group. * It can be easily extended to allow replacing tokens an an arbitrary 'depth' by just using `Vec::splice` at the proper position. This is important for PR rust-lang#76130, which requires us to track information about attributes along with tokens. * The lazy approach to `TokenStream` construction allows us to easily parse an AST struct, and then decide after the fact whether we need a `TokenStream`. This will be useful when we start collecting tokens for `Attribute` - we can discard the `LazyTokenStream` if the parsed attribute doesn't need tokens (e.g. is a builtin attribute). The performance impact seems to be neglibile (see rust-lang#77250 (comment)). There is a small slowdown on a few benchmarks, but it only rises above 1% for incremental builds, where it represents a larger fraction of the much smaller instruction count. There a ~1% speedup on a few other incremental benchmarks - my guess is that the speedups and slowdowns will usually cancel out in practice.
1 parent cb2462c commit 593fdd3

File tree

7 files changed

+254
-167
lines changed

7 files changed

+254
-167
lines changed

compiler/rustc_ast/src/ast.rs

+10-10
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub use UnsafeSource::*;
2424

2525
use crate::ptr::P;
2626
use crate::token::{self, CommentKind, DelimToken};
27-
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree};
27+
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream, TokenTree};
2828

2929
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
3030
use rustc_data_structures::stack::ensure_sufficient_stack;
@@ -97,7 +97,7 @@ pub struct Path {
9797
/// The segments in the path: the things separated by `::`.
9898
/// Global paths begin with `kw::PathRoot`.
9999
pub segments: Vec<PathSegment>,
100-
pub tokens: Option<TokenStream>,
100+
pub tokens: Option<LazyTokenStream>,
101101
}
102102

103103
impl PartialEq<Symbol> for Path {
@@ -535,7 +535,7 @@ pub struct Block {
535535
/// Distinguishes between `unsafe { ... }` and `{ ... }`.
536536
pub rules: BlockCheckMode,
537537
pub span: Span,
538-
pub tokens: Option<TokenStream>,
538+
pub tokens: Option<LazyTokenStream>,
539539
}
540540

541541
/// A match pattern.
@@ -546,7 +546,7 @@ pub struct Pat {
546546
pub id: NodeId,
547547
pub kind: PatKind,
548548
pub span: Span,
549-
pub tokens: Option<TokenStream>,
549+
pub tokens: Option<LazyTokenStream>,
550550
}
551551

552552
impl Pat {
@@ -892,7 +892,7 @@ pub struct Stmt {
892892
pub id: NodeId,
893893
pub kind: StmtKind,
894894
pub span: Span,
895-
pub tokens: Option<TokenStream>,
895+
pub tokens: Option<LazyTokenStream>,
896896
}
897897

898898
impl Stmt {
@@ -1040,7 +1040,7 @@ pub struct Expr {
10401040
pub kind: ExprKind,
10411041
pub span: Span,
10421042
pub attrs: AttrVec,
1043-
pub tokens: Option<TokenStream>,
1043+
pub tokens: Option<LazyTokenStream>,
10441044
}
10451045

10461046
// `Expr` is used a lot. Make sure it doesn't unintentionally get bigger.
@@ -1835,7 +1835,7 @@ pub struct Ty {
18351835
pub id: NodeId,
18361836
pub kind: TyKind,
18371837
pub span: Span,
1838-
pub tokens: Option<TokenStream>,
1838+
pub tokens: Option<LazyTokenStream>,
18391839
}
18401840

18411841
impl Clone for Ty {
@@ -2408,7 +2408,7 @@ impl<D: Decoder> rustc_serialize::Decodable<D> for AttrId {
24082408
pub struct AttrItem {
24092409
pub path: Path,
24102410
pub args: MacArgs,
2411-
pub tokens: Option<TokenStream>,
2411+
pub tokens: Option<LazyTokenStream>,
24122412
}
24132413

24142414
/// A list of attributes.
@@ -2482,7 +2482,7 @@ pub enum CrateSugar {
24822482
pub struct Visibility {
24832483
pub kind: VisibilityKind,
24842484
pub span: Span,
2485-
pub tokens: Option<TokenStream>,
2485+
pub tokens: Option<LazyTokenStream>,
24862486
}
24872487

24882488
#[derive(Clone, Encodable, Decodable, Debug)]
@@ -2569,7 +2569,7 @@ pub struct Item<K = ItemKind> {
25692569
///
25702570
/// Note that the tokens here do not include the outer attributes, but will
25712571
/// include inner attributes.
2572-
pub tokens: Option<TokenStream>,
2572+
pub tokens: Option<LazyTokenStream>,
25732573
}
25742574

25752575
impl Item {

compiler/rustc_ast/src/tokenstream.rs

+73-1
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
use crate::token::{self, DelimToken, Token, TokenKind};
1717

1818
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
19-
use rustc_data_structures::sync::Lrc;
19+
use rustc_data_structures::sync::{self, Lrc};
2020
use rustc_macros::HashStable_Generic;
21+
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
2122
use rustc_span::{Span, DUMMY_SP};
2223
use smallvec::{smallvec, SmallVec};
2324

@@ -119,6 +120,77 @@ where
119120
}
120121
}
121122

123+
// A cloneable callback which produces a `TokenStream`. Each clone
124+
// of this should produce the same `TokenStream`
125+
pub trait CreateTokenStream: sync::Send + sync::Sync + FnOnce() -> TokenStream {
126+
// Workaround for the fact that `Clone` is not object-safe
127+
fn clone_it(&self) -> Box<dyn CreateTokenStream>;
128+
}
129+
130+
impl<F: 'static + Clone + sync::Send + sync::Sync + FnOnce() -> TokenStream> CreateTokenStream
131+
for F
132+
{
133+
fn clone_it(&self) -> Box<dyn CreateTokenStream> {
134+
Box::new(self.clone())
135+
}
136+
}
137+
138+
impl Clone for Box<dyn CreateTokenStream> {
139+
fn clone(&self) -> Self {
140+
let val: &(dyn CreateTokenStream) = &**self;
141+
val.clone_it()
142+
}
143+
}
144+
145+
/// A lazy version of `TokenStream`, which may defer creation
146+
/// of an actual `TokenStream` until it is needed.
147+
pub type LazyTokenStream = Lrc<LazyTokenStreamInner>;
148+
149+
#[derive(Clone)]
150+
pub enum LazyTokenStreamInner {
151+
Lazy(Box<dyn CreateTokenStream>),
152+
Ready(TokenStream),
153+
}
154+
155+
impl std::fmt::Debug for LazyTokenStreamInner {
156+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
157+
match self {
158+
LazyTokenStreamInner::Lazy(..) => f.debug_struct("LazyTokenStream::Lazy").finish(),
159+
LazyTokenStreamInner::Ready(..) => f.debug_struct("LazyTokenStream::Ready").finish(),
160+
}
161+
}
162+
}
163+
164+
impl LazyTokenStreamInner {
165+
pub fn into_token_stream(&self) -> TokenStream {
166+
match self {
167+
// Note that we do not cache this. If this ever becomes a performance
168+
// problem, we should investigate wrapping `LazyTokenStreamInner`
169+
// in a lock
170+
LazyTokenStreamInner::Lazy(cb) => (cb.clone())(),
171+
LazyTokenStreamInner::Ready(stream) => stream.clone(),
172+
}
173+
}
174+
}
175+
176+
impl<S: Encoder> Encodable<S> for LazyTokenStreamInner {
177+
fn encode(&self, _s: &mut S) -> Result<(), S::Error> {
178+
panic!("Attempted to encode LazyTokenStream");
179+
}
180+
}
181+
182+
impl<D: Decoder> Decodable<D> for LazyTokenStreamInner {
183+
fn decode(_d: &mut D) -> Result<Self, D::Error> {
184+
panic!("Attempted to decode LazyTokenStream");
185+
}
186+
}
187+
188+
impl<CTX> HashStable<CTX> for LazyTokenStreamInner {
189+
fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
190+
panic!("Attempted to compute stable hash for LazyTokenStream");
191+
}
192+
}
193+
122194
/// A `TokenStream` is an abstract sequence of tokens, organized into `TokenTree`s.
123195
///
124196
/// The goal is for procedural macros to work with `TokenStream`s and `TokenTree`s

compiler/rustc_parse/src/lib.rs

+13-10
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
use rustc_ast as ast;
1010
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
11-
use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
11+
use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree};
1212
use rustc_ast_pretty::pprust;
1313
use rustc_data_structures::sync::Lrc;
1414
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
@@ -248,29 +248,32 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
248248
// As a result, some AST nodes are annotated with the token stream they
249249
// came from. Here we attempt to extract these lossless token streams
250250
// before we fall back to the stringification.
251+
252+
let convert_tokens = |tokens: Option<LazyTokenStream>| tokens.map(|t| t.into_token_stream());
253+
251254
let tokens = match *nt {
252255
Nonterminal::NtItem(ref item) => {
253256
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
254257
}
255-
Nonterminal::NtBlock(ref block) => block.tokens.clone(),
258+
Nonterminal::NtBlock(ref block) => convert_tokens(block.tokens.clone()),
256259
Nonterminal::NtStmt(ref stmt) => {
257260
// FIXME: We currently only collect tokens for `:stmt`
258261
// matchers in `macro_rules!` macros. When we start collecting
259262
// tokens for attributes on statements, we will need to prepend
260263
// attributes here
261-
stmt.tokens.clone()
264+
convert_tokens(stmt.tokens.clone())
262265
}
263-
Nonterminal::NtPat(ref pat) => pat.tokens.clone(),
264-
Nonterminal::NtTy(ref ty) => ty.tokens.clone(),
266+
Nonterminal::NtPat(ref pat) => convert_tokens(pat.tokens.clone()),
267+
Nonterminal::NtTy(ref ty) => convert_tokens(ty.tokens.clone()),
265268
Nonterminal::NtIdent(ident, is_raw) => {
266269
Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
267270
}
268271
Nonterminal::NtLifetime(ident) => {
269272
Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
270273
}
271-
Nonterminal::NtMeta(ref attr) => attr.tokens.clone(),
272-
Nonterminal::NtPath(ref path) => path.tokens.clone(),
273-
Nonterminal::NtVis(ref vis) => vis.tokens.clone(),
274+
Nonterminal::NtMeta(ref attr) => convert_tokens(attr.tokens.clone()),
275+
Nonterminal::NtPath(ref path) => convert_tokens(path.tokens.clone()),
276+
Nonterminal::NtVis(ref vis) => convert_tokens(vis.tokens.clone()),
274277
Nonterminal::NtTT(ref tt) => Some(tt.clone().into()),
275278
Nonterminal::NtExpr(ref expr) | Nonterminal::NtLiteral(ref expr) => {
276279
if expr.tokens.is_none() {
@@ -602,10 +605,10 @@ fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
602605
fn prepend_attrs(
603606
sess: &ParseSess,
604607
attrs: &[ast::Attribute],
605-
tokens: Option<&tokenstream::TokenStream>,
608+
tokens: Option<&tokenstream::LazyTokenStream>,
606609
span: rustc_span::Span,
607610
) -> Option<tokenstream::TokenStream> {
608-
let tokens = tokens?;
611+
let tokens = tokens?.clone().into_token_stream();
609612
if attrs.is_empty() {
610613
return Some(tokens.clone());
611614
}

compiler/rustc_parse/src/parser/attr.rs

+14-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use rustc_ast::attr;
44
use rustc_ast::token::{self, Nonterminal};
55
use rustc_ast_pretty::pprust;
66
use rustc_errors::{error_code, PResult};
7-
use rustc_span::Span;
7+
use rustc_span::{sym, Span};
88

99
use tracing::debug;
1010

@@ -302,3 +302,16 @@ impl<'a> Parser<'a> {
302302
Err(self.struct_span_err(self.token.span, &msg))
303303
}
304304
}
305+
306+
pub fn maybe_needs_tokens(attrs: &[ast::Attribute]) -> bool {
307+
attrs.iter().any(|attr| {
308+
if let Some(ident) = attr.ident() {
309+
ident.name == sym::derive
310+
// This might apply a custom attribute/derive
311+
|| ident.name == sym::cfg_attr
312+
|| !rustc_feature::is_builtin_attr_name(ident.name)
313+
} else {
314+
true
315+
}
316+
})
317+
}

compiler/rustc_parse/src/parser/expr.rs

+10-9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::maybe_recover_from_interpolated_ty_qpath;
66

77
use rustc_ast::ptr::P;
88
use rustc_ast::token::{self, Token, TokenKind};
9+
use rustc_ast::tokenstream::Spacing;
910
use rustc_ast::util::classify;
1011
use rustc_ast::util::literal::LitError;
1112
use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity};
@@ -18,7 +19,6 @@ use rustc_span::source_map::{self, Span, Spanned};
1819
use rustc_span::symbol::{kw, sym, Ident, Symbol};
1920
use rustc_span::{BytePos, Pos};
2021
use std::mem;
21-
use tracing::debug;
2222

2323
/// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression
2424
/// dropped into the token stream, which happens while parsing the result of
@@ -459,7 +459,7 @@ impl<'a> Parser<'a> {
459459
/// Parses a prefix-unary-operator expr.
460460
fn parse_prefix_expr(&mut self, attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> {
461461
let attrs = self.parse_or_use_outer_attributes(attrs)?;
462-
self.maybe_collect_tokens(!attrs.is_empty(), |this| {
462+
self.maybe_collect_tokens(super::attr::maybe_needs_tokens(&attrs), |this| {
463463
let lo = this.token.span;
464464
// Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr()
465465
let (hi, ex) = match this.token.uninterpolate().kind {
@@ -884,7 +884,7 @@ impl<'a> Parser<'a> {
884884
assert!(suffix.is_none());
885885
let symbol = Symbol::intern(&i);
886886
self.token = Token::new(token::Ident(symbol, false), ident_span);
887-
let next_token = Token::new(token::Dot, dot_span);
887+
let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
888888
self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token))
889889
}
890890
// 1.2 | 1.2e3
@@ -902,12 +902,14 @@ impl<'a> Parser<'a> {
902902
};
903903
let symbol1 = Symbol::intern(&i1);
904904
self.token = Token::new(token::Ident(symbol1, false), ident1_span);
905-
let next_token1 = Token::new(token::Dot, dot_span);
905+
// This needs to be `Spacing::Alone` to prevent regressions.
906+
// See issue #76399 and PR #76285 for more details
907+
let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
906908
let base1 =
907909
self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1));
908910
let symbol2 = Symbol::intern(&i2);
909911
let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
910-
self.bump_with(next_token2); // `.`
912+
self.bump_with((next_token2, self.token_spacing)); // `.`
911913
self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None)
912914
}
913915
// 1e+ | 1e- (recovered)
@@ -930,7 +932,7 @@ impl<'a> Parser<'a> {
930932
base: P<Expr>,
931933
field: Symbol,
932934
suffix: Option<Symbol>,
933-
next_token: Option<Token>,
935+
next_token: Option<(Token, Spacing)>,
934936
) -> P<Expr> {
935937
match next_token {
936938
Some(next_token) => self.bump_with(next_token),
@@ -1109,12 +1111,11 @@ impl<'a> Parser<'a> {
11091111

11101112
fn maybe_collect_tokens(
11111113
&mut self,
1112-
has_outer_attrs: bool,
1114+
needs_tokens: bool,
11131115
f: impl FnOnce(&mut Self) -> PResult<'a, P<Expr>>,
11141116
) -> PResult<'a, P<Expr>> {
1115-
if has_outer_attrs {
1117+
if needs_tokens {
11161118
let (mut expr, tokens) = self.collect_tokens(f)?;
1117-
debug!("maybe_collect_tokens: Collected tokens for {:?} (tokens {:?}", expr, tokens);
11181119
expr.tokens = Some(tokens);
11191120
Ok(expr)
11201121
} else {

compiler/rustc_parse/src/parser/item.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,16 @@ impl<'a> Parser<'a> {
116116
Some(item.into_inner())
117117
});
118118

119+
let needs_tokens = super::attr::maybe_needs_tokens(&attrs);
120+
119121
let mut unclosed_delims = vec![];
120-
let has_attrs = !attrs.is_empty();
121122
let parse_item = |this: &mut Self| {
122123
let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name);
123124
unclosed_delims.append(&mut this.unclosed_delims);
124125
item
125126
};
126127

127-
let (mut item, tokens) = if has_attrs {
128+
let (mut item, tokens) = if needs_tokens {
128129
let (item, tokens) = self.collect_tokens(parse_item)?;
129130
(item, Some(tokens))
130131
} else {

0 commit comments

Comments
 (0)