Skip to content

Commit 2b6a8f0

Browse files
ehussjasonwilliams
authored andcommitted
Syntax: More stringent escape handling. (#297)
This makes a few changes to strings and byte/char literals: - Allow _ in unicode escapes. - Reject byte/char literals with too many chars. - Reject non-ascii in byte literals. - Reject ascii escape with >0x7f. - Reject bad escape sequences. cc #284
1 parent e3ec047 commit 2b6a8f0

File tree

2 files changed

+151
-14
lines changed

2 files changed

+151
-14
lines changed

RustEnhanced.sublime-syntax

+50-14
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ scope: source.rust
88
variables:
99
identifier: '(?:[[:alpha:]][_[:alnum:]]*|_[_[:alnum:]]+)'
1010
escaped_byte: '\\([nrt0\"''\\]|x\h{2})'
11-
escaped_char: '\\([nrt0\"''\\]|x\h{2}|u\{\h{1,6}\})'
11+
escaped_char: '\\([nrt0\"''\\]|x[0-7]\h|u\{(?:\h_*){1,6}\})'
1212
int_suffixes: '[iu](?:8|16|32|64|128|size)'
1313
float_suffixes: 'f(32|64)'
1414
contexts:
@@ -905,22 +905,35 @@ contexts:
905905
- include: char
906906
- include: byte
907907

908-
escaped-byte:
909-
- match: '{{escaped_byte}}'
910-
scope: constant.character.escape.rust
911-
912908
byte:
913-
- match: "(b)(')(?=([^'\\\\]|{{escaped_byte}})')"
909+
- match: "(b)(')"
914910
captures:
915911
1: storage.type.string.rust
916912
2: punctuation.definition.string.begin.rust
917913
push:
918914
- meta_include_prototype: false
919915
- meta_scope: string.quoted.single.rust
920-
- match: \'
921-
scope: punctuation.definition.string.end.rust
916+
# ASCII except ', \, \n, \r or \t
917+
- match: '[\x00-\x08\x0b-\x0c\x0e-\x26\x28-\x5b\x5d-\x7f]'
918+
set: byte-tail
919+
# Don't mark entire file invalid while writing, even though this is
920+
# not valid syntax.
921+
- match: '\n'
922922
pop: true
923-
- include: escaped-byte
923+
- match: '{{escaped_byte}}'
924+
scope: constant.character.escape.rust
925+
set: byte-tail
926+
- match: ''
927+
set: byte-tail
928+
929+
byte-tail:
930+
- match: "'"
931+
scope: string.quoted.single.rust punctuation.definition.string.end.rust
932+
pop: true
933+
- match: '\n'
934+
pop: true
935+
- match: '.'
936+
scope: invalid.illegal.byte.rust
924937

925938
byte-string:
926939
- match: '(b)(")'
@@ -933,7 +946,10 @@ contexts:
933946
- match: '"'
934947
scope: punctuation.definition.string.end.rust
935948
pop: true
936-
- include: escaped-byte
949+
- match: '{{escaped_byte}}'
950+
scope: constant.character.escape.rust
951+
- match: '\\.'
952+
scope: invalid.illegal.character.escape.rust
937953

938954
raw-byte-string:
939955
- match: (br)(#*)"
@@ -950,16 +966,36 @@ contexts:
950966
escaped-char:
951967
- match: '{{escaped_char}}'
952968
scope: constant.character.escape.rust
969+
- match: '\\u{[^}]*}'
970+
scope: invalid.illegal.character.escape.rust
971+
- match: '\\.'
972+
scope: invalid.illegal.character.escape.rust
953973

954974
char:
955-
- match: "'(?=([^'\\\\]|{{escaped_char}})')"
975+
- match: "'"
956976
scope: punctuation.definition.string.begin.rust
957977
push:
958978
- meta_scope: string.quoted.single.rust
959-
- match: \'
960-
scope: punctuation.definition.string.end.rust
979+
- match: "[^'\\\\\n\r\t]"
980+
set: char-tail
981+
# Don't mark entire file invalid while writing, even though this is
982+
# not valid syntax.
983+
- match: '\n'
961984
pop: true
962-
- include: escaped-char
985+
- match: '{{escaped_char}}'
986+
scope: constant.character.escape.rust
987+
set: char-tail
988+
- match: ''
989+
set: char-tail
990+
991+
char-tail:
992+
- match: "'"
993+
scope: string.quoted.single.rust punctuation.definition.string.end.rust
994+
pop: true
995+
- match: '\n'
996+
pop: true
997+
- match: '.'
998+
scope: invalid.illegal.char.rust
963999

9641000
string:
9651001
- match: '"'

syntax_test_rust.rs

+101
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,107 @@ let raw_bytes = br#"This won't escape anything either \x01 \""#;
8484
// ^^ storage.type
8585
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ string.quoted.double - constant.character.escape
8686

87+
let b_simple = b'a';
88+
// ^^^^ string.quoted.single
89+
// ^ storage.type.string
90+
// ^ punctuation.definition.string.begin
91+
// ^ punctuation.definition.string.end
92+
// ^ punctuation.terminator
93+
let b_newline = b'\n';
94+
// ^^^^^ string.quoted.single
95+
// ^^ string.quoted.single constant.character.escape
96+
let b_nul = b'\0';
97+
// ^^ string.quoted.single constant.character.escape
98+
let b_back = b'\\';
99+
// ^^ string.quoted.single constant.character.escape
100+
let b_quote = b'\'';
101+
// ^^ string.quoted.single constant.character.escape
102+
let b_esc_nul = b'\x00';
103+
// ^^^^ string.quoted.single constant.character.escape
104+
let b_esc_255 = b'\xff';
105+
// ^^^^ string.quoted.single constant.character.escape
106+
let b_esc_inv = b'\a';
107+
// ^^ invalid.illegal.byte
108+
// ^ string.quoted.single punctuation.definition.string.end
109+
let b_inv_len = b'abc';
110+
// ^ string.quoted.single
111+
// ^^ invalid.illegal.byte
112+
// ^ string.quoted.single punctuation.definition.string.end
113+
let b_inv_uni = b'♥';
114+
// ^ invalid.illegal.byte
115+
// ^ string.quoted.single punctuation.definition.string.end
116+
let b_inv_empty = b'';
117+
// ^^^ string.quoted.single
118+
// ^ punctuation.definition.string.begin
119+
// ^ punctuation.definition.string.end
120+
let b_unclosed1 = b'
121+
// Avoid error on entire file.
122+
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ comment.line.double-slash - invalid - string
123+
124+
let bs_newline = b"abc\n";
125+
// ^^^^^^^^ string.quoted.double
126+
// ^ punctuation.definition.string.begin
127+
// ^^ constant.character.escape
128+
// ^ punctuation.definition.string.end
129+
// ^ punctuation.terminator
130+
let bs_nul = b"abc\0";
131+
// ^^ string.quoted.double constant.character.escape
132+
let bs_esc_nul = b"abc\x00";
133+
// ^^^^ string.quoted.double constant.character.escape
134+
let bs_esc_255 = b"abc\xff";
135+
// ^^^^ string.quoted.double constant.character.escape
136+
let bs_esc_inv = b"abc\a";
137+
// ^^ string.quoted.double invalid.illegal.character.escape
138+
// ^ string.quoted.double punctuation.definition.string.end - invalid
139+
140+
let char_newline = '\n';
141+
// ^^^^ string.quoted.single
142+
// ^ punctuation.definition.string.begin
143+
// ^^ constant.character.escape
144+
// ^ punctuation.definition.string.end
145+
// ^ punctuation.terminator
146+
let char_nul = '\0';
147+
// ^^ string.quoted.single constant.character.escape
148+
let char_extra_inv = 'ab';
149+
// ^ string.quoted.single
150+
// ^ invalid.illegal.char
151+
// ^ string.quoted.single punctuation.definition.string.end
152+
let char_ascii_esc_nul = '\x00';
153+
// ^^^^ string.quoted.single constant.character.escape
154+
let char_ascii_esc_127 = '\x7f';
155+
// ^^^^ string.quoted.single constant.character.escape
156+
let char_ascii_inv_255 = '\xff';
157+
// ^^^^ invalid.illegal.char
158+
let char_uni_esc = '\u{3b1}';
159+
// ^^^^^^^ string.quoted.single constant.character.escape
160+
let char_uni_esc_empty = '\u{}';
161+
// ^^^^ invalid.illegal.char
162+
let char_uni_esc_under_start = '\u{_1_}';
163+
// ^^^^^^^ invalid.illegal.char
164+
let char_uni_esc_under1 = '\u{1_}';
165+
// ^^^^^^ string.quoted.single constant.character.escape
166+
let char_uni_esc_under2 = '\u{1_2__3___}';
167+
// ^^^^^^^^^^^^^ string.quoted.single constant.character.escape
168+
let char_uni_esc_under3 = '\u{10__FFFF}';
169+
// ^^^^^^^^^^^^ string.quoted.single constant.character.escape
170+
let char_uni_esc_extra = '\u{1234567}';
171+
// ^^^^^^^^^^^ invalid.illegal.char
172+
173+
let s_ascii_inv_255 = "\xff";
174+
// ^^ string.quoted.double invalid.illegal.character.escape
175+
let s_uni_esc_empty = "\u{}";
176+
// ^^^^ string.quoted.double invalid.illegal.character.escape
177+
let s_uni_esc_under_start = "\u{_1_}";
178+
// ^^^^^^^ string.quoted.double invalid.illegal.character.escape
179+
let s_uni_esc_under1 = "\u{1_}";
180+
// ^^^^^^ string.quoted.double constant.character.escape
181+
let s_uni_esc_under2 = "\u{1_2__3___}";
182+
// ^^^^^^^^^^^^^ string.quoted.double constant.character.escape
183+
let s_uni_esc_under3 = "\u{10__FFFF}";
184+
// ^^^^^^^^^^^^ string.quoted.double constant.character.escape
185+
let s_uni_esc_extra = "\u{1234567}";
186+
// ^^^^^^^^^^^ string.quoted.double invalid.illegal.character.escape
187+
87188
0;
88189
// <- constant.numeric.integer.decimal
89190
1_000u32;

0 commit comments

Comments
 (0)