Skip to content

Commit f01858b

Browse files
authored
Rollup merge of rust-lang#63709 - matklad:decomposed-tokens, r=petrochenkov
Move token gluing to token stream parsing work towards rust-lang#63689, this moves token gluing from the lexer to the token tree layer. This is only a minimal step, but I like the negative diff here. r? @petrochenkov
2 parents ea88adc + 914e1f4 commit f01858b

File tree

6 files changed

+68
-209
lines changed

6 files changed

+68
-209
lines changed

src/librustc_lexer/src/lib.rs

+14-162
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@ pub enum TokenKind {
2323
Lifetime { starts_with_number: bool },
2424
Semi,
2525
Comma,
26-
DotDotDot,
27-
DotDotEq,
28-
DotDot,
2926
Dot,
3027
OpenParen,
3128
CloseParen,
@@ -37,41 +34,19 @@ pub enum TokenKind {
3734
Pound,
3835
Tilde,
3936
Question,
40-
ColonColon,
4137
Colon,
4238
Dollar,
43-
EqEq,
4439
Eq,
45-
FatArrow,
46-
Ne,
4740
Not,
48-
Le,
49-
LArrow,
5041
Lt,
51-
ShlEq,
52-
Shl,
53-
Ge,
5442
Gt,
55-
ShrEq,
56-
Shr,
57-
RArrow,
5843
Minus,
59-
MinusEq,
6044
And,
61-
AndAnd,
62-
AndEq,
6345
Or,
64-
OrOr,
65-
OrEq,
66-
PlusEq,
6746
Plus,
68-
StarEq,
6947
Star,
70-
SlashEq,
7148
Slash,
72-
CaretEq,
7349
Caret,
74-
PercentEq,
7550
Percent,
7651
Unknown,
7752
}
@@ -135,13 +110,7 @@ impl Cursor<'_> {
135110
'/' => match self.nth_char(0) {
136111
'/' => self.line_comment(),
137112
'*' => self.block_comment(),
138-
_ => {
139-
if self.eat_assign() {
140-
SlashEq
141-
} else {
142-
Slash
143-
}
144-
}
113+
_ => Slash,
145114
},
146115
c if character_properties::is_whitespace(c) => self.whitespace(),
147116
'r' => match (self.nth_char(0), self.nth_char(1)) {
@@ -199,22 +168,7 @@ impl Cursor<'_> {
199168
}
200169
';' => Semi,
201170
',' => Comma,
202-
'.' => {
203-
if self.nth_char(0) == '.' {
204-
self.bump();
205-
if self.nth_char(0) == '.' {
206-
self.bump();
207-
DotDotDot
208-
} else if self.nth_char(0) == '=' {
209-
self.bump();
210-
DotDotEq
211-
} else {
212-
DotDot
213-
}
214-
} else {
215-
Dot
216-
}
217-
}
171+
'.' => Dot,
218172
'(' => OpenParen,
219173
')' => CloseParen,
220174
'{' => OpenBrace,
@@ -225,112 +179,19 @@ impl Cursor<'_> {
225179
'#' => Pound,
226180
'~' => Tilde,
227181
'?' => Question,
228-
':' => {
229-
if self.nth_char(0) == ':' {
230-
self.bump();
231-
ColonColon
232-
} else {
233-
Colon
234-
}
235-
}
182+
':' => Colon,
236183
'$' => Dollar,
237-
'=' => {
238-
if self.nth_char(0) == '=' {
239-
self.bump();
240-
EqEq
241-
} else if self.nth_char(0) == '>' {
242-
self.bump();
243-
FatArrow
244-
} else {
245-
Eq
246-
}
247-
}
248-
'!' => {
249-
if self.nth_char(0) == '=' {
250-
self.bump();
251-
Ne
252-
} else {
253-
Not
254-
}
255-
}
256-
'<' => match self.nth_char(0) {
257-
'=' => {
258-
self.bump();
259-
Le
260-
}
261-
'<' => {
262-
self.bump();
263-
if self.eat_assign() { ShlEq } else { Shl }
264-
}
265-
'-' => {
266-
self.bump();
267-
LArrow
268-
}
269-
_ => Lt,
270-
},
271-
'>' => match self.nth_char(0) {
272-
'=' => {
273-
self.bump();
274-
Ge
275-
}
276-
'>' => {
277-
self.bump();
278-
if self.eat_assign() { ShrEq } else { Shr }
279-
}
280-
_ => Gt,
281-
},
282-
'-' => {
283-
if self.nth_char(0) == '>' {
284-
self.bump();
285-
RArrow
286-
} else {
287-
if self.eat_assign() { MinusEq } else { Minus }
288-
}
289-
}
290-
'&' => {
291-
if self.nth_char(0) == '&' {
292-
self.bump();
293-
AndAnd
294-
} else {
295-
if self.eat_assign() { AndEq } else { And }
296-
}
297-
}
298-
'|' => {
299-
if self.nth_char(0) == '|' {
300-
self.bump();
301-
OrOr
302-
} else {
303-
if self.eat_assign() { OrEq } else { Or }
304-
}
305-
}
306-
'+' => {
307-
if self.eat_assign() {
308-
PlusEq
309-
} else {
310-
Plus
311-
}
312-
}
313-
'*' => {
314-
if self.eat_assign() {
315-
StarEq
316-
} else {
317-
Star
318-
}
319-
}
320-
'^' => {
321-
if self.eat_assign() {
322-
CaretEq
323-
} else {
324-
Caret
325-
}
326-
}
327-
'%' => {
328-
if self.eat_assign() {
329-
PercentEq
330-
} else {
331-
Percent
332-
}
333-
}
184+
'=' => Eq,
185+
'!' => Not,
186+
'<' => Lt,
187+
'>' => Gt,
188+
'-' => Minus,
189+
'&' => And,
190+
'|' => Or,
191+
'+' => Plus,
192+
'*' => Star,
193+
'^' => Caret,
194+
'%' => Percent,
334195
'\'' => self.lifetime_or_char(),
335196
'"' => {
336197
let terminated = self.double_quoted_string();
@@ -643,15 +504,6 @@ impl Cursor<'_> {
643504
self.bump();
644505
}
645506
}
646-
647-
fn eat_assign(&mut self) -> bool {
648-
if self.nth_char(0) == '=' {
649-
self.bump();
650-
true
651-
} else {
652-
false
653-
}
654-
}
655507
}
656508

657509
pub mod character_properties {

src/libsyntax/parse/lexer/mod.rs

-25
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,6 @@ impl<'a> StringReader<'a> {
273273
}
274274
rustc_lexer::TokenKind::Semi => token::Semi,
275275
rustc_lexer::TokenKind::Comma => token::Comma,
276-
rustc_lexer::TokenKind::DotDotDot => token::DotDotDot,
277-
rustc_lexer::TokenKind::DotDotEq => token::DotDotEq,
278-
rustc_lexer::TokenKind::DotDot => token::DotDot,
279276
rustc_lexer::TokenKind::Dot => token::Dot,
280277
rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
281278
rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
@@ -287,42 +284,20 @@ impl<'a> StringReader<'a> {
287284
rustc_lexer::TokenKind::Pound => token::Pound,
288285
rustc_lexer::TokenKind::Tilde => token::Tilde,
289286
rustc_lexer::TokenKind::Question => token::Question,
290-
rustc_lexer::TokenKind::ColonColon => token::ModSep,
291287
rustc_lexer::TokenKind::Colon => token::Colon,
292288
rustc_lexer::TokenKind::Dollar => token::Dollar,
293-
rustc_lexer::TokenKind::EqEq => token::EqEq,
294289
rustc_lexer::TokenKind::Eq => token::Eq,
295-
rustc_lexer::TokenKind::FatArrow => token::FatArrow,
296-
rustc_lexer::TokenKind::Ne => token::Ne,
297290
rustc_lexer::TokenKind::Not => token::Not,
298-
rustc_lexer::TokenKind::Le => token::Le,
299-
rustc_lexer::TokenKind::LArrow => token::LArrow,
300291
rustc_lexer::TokenKind::Lt => token::Lt,
301-
rustc_lexer::TokenKind::ShlEq => token::BinOpEq(token::Shl),
302-
rustc_lexer::TokenKind::Shl => token::BinOp(token::Shl),
303-
rustc_lexer::TokenKind::Ge => token::Ge,
304292
rustc_lexer::TokenKind::Gt => token::Gt,
305-
rustc_lexer::TokenKind::ShrEq => token::BinOpEq(token::Shr),
306-
rustc_lexer::TokenKind::Shr => token::BinOp(token::Shr),
307-
rustc_lexer::TokenKind::RArrow => token::RArrow,
308293
rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
309-
rustc_lexer::TokenKind::MinusEq => token::BinOpEq(token::Minus),
310294
rustc_lexer::TokenKind::And => token::BinOp(token::And),
311-
rustc_lexer::TokenKind::AndEq => token::BinOpEq(token::And),
312-
rustc_lexer::TokenKind::AndAnd => token::AndAnd,
313295
rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
314-
rustc_lexer::TokenKind::OrEq => token::BinOpEq(token::Or),
315-
rustc_lexer::TokenKind::OrOr => token::OrOr,
316296
rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
317-
rustc_lexer::TokenKind::PlusEq => token::BinOpEq(token::Plus),
318297
rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
319-
rustc_lexer::TokenKind::StarEq => token::BinOpEq(token::Star),
320298
rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
321-
rustc_lexer::TokenKind::SlashEq => token::BinOpEq(token::Slash),
322299
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
323-
rustc_lexer::TokenKind::CaretEq => token::BinOpEq(token::Caret),
324300
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
325-
rustc_lexer::TokenKind::PercentEq => token::BinOpEq(token::Percent),
326301

327302
rustc_lexer::TokenKind::Unknown => {
328303
let c = self.str_from(start).chars().next().unwrap();

src/libsyntax/parse/lexer/tests.rs

+20-12
Original file line numberDiff line numberDiff line change
@@ -75,42 +75,50 @@ fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind
7575
}
7676

7777
#[test]
78-
fn doublecolonparsing() {
78+
fn doublecolon_parsing() {
7979
with_default_globals(|| {
8080
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
8181
let sh = mk_sess(sm.clone());
82-
check_tokenization(setup(&sm, &sh, "a b".to_string()),
83-
vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
82+
check_tokenization(
83+
setup(&sm, &sh, "a b".to_string()),
84+
vec![mk_ident("a"), token::Whitespace, mk_ident("b")],
85+
);
8486
})
8587
}
8688

8789
#[test]
88-
fn dcparsing_2() {
90+
fn doublecolon_parsing_2() {
8991
with_default_globals(|| {
9092
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
9193
let sh = mk_sess(sm.clone());
92-
check_tokenization(setup(&sm, &sh, "a::b".to_string()),
93-
vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
94+
check_tokenization(
95+
setup(&sm, &sh, "a::b".to_string()),
96+
vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")],
97+
);
9498
})
9599
}
96100

97101
#[test]
98-
fn dcparsing_3() {
102+
fn doublecolon_parsing_3() {
99103
with_default_globals(|| {
100104
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
101105
let sh = mk_sess(sm.clone());
102-
check_tokenization(setup(&sm, &sh, "a ::b".to_string()),
103-
vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
106+
check_tokenization(
107+
setup(&sm, &sh, "a ::b".to_string()),
108+
vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")],
109+
);
104110
})
105111
}
106112

107113
#[test]
108-
fn dcparsing_4() {
114+
fn doublecolon_parsing_4() {
109115
with_default_globals(|| {
110116
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
111117
let sh = mk_sess(sm.clone());
112-
check_tokenization(setup(&sm, &sh, "a:: b".to_string()),
113-
vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
118+
check_tokenization(
119+
setup(&sm, &sh, "a:: b".to_string()),
120+
vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")],
121+
);
114122
})
115123
}
116124

0 commit comments

Comments
 (0)