Skip to content

Commit 79ed2da

Browse files
committed
Syntax: More stringent escape handling.
This makes a few changes to strings and byte/char literals: - Allow _ in unicode escapes. - Reject byte/char literals with too many chars. - Reject non-ascii in byte literals. - Reject ascii escape with >0x7f. - Reject bad escape sequences. cc rust-lang#284
1 parent 761796f commit 79ed2da

File tree

2 files changed

+151
-14
lines changed

2 files changed

+151
-14
lines changed

RustEnhanced.sublime-syntax

+50-14
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ scope: source.rust
88
variables:
99
identifier: '(?:[[:alpha:]][_[:alnum:]]*|_[_[:alnum:]]+)'
1010
escaped_byte: '\\([nrt0\"''\\]|x\h{2})'
11-
escaped_char: '\\([nrt0\"''\\]|x\h{2}|u\{\h{1,6}\})'
11+
escaped_char: '\\([nrt0\"''\\]|x[0-7]\h|u\{\h_*(?:\h_*){,5}\})'
1212
int_suffixes: '[iu](?:8|16|32|64|128|size)'
1313
float_suffixes: 'f(32|64)'
1414
contexts:
@@ -902,22 +902,35 @@ contexts:
902902
- include: char
903903
- include: byte
904904

905-
escaped-byte:
906-
- match: '{{escaped_byte}}'
907-
scope: constant.character.escape.rust
908-
909905
byte:
910-
- match: "(b)(')(?=([^'\\\\]|{{escaped_byte}})')"
906+
- match: "(b)(')"
911907
captures:
912908
1: storage.type.string.rust
913909
2: punctuation.definition.string.begin.rust
914910
push:
915911
- meta_include_prototype: false
916912
- meta_scope: string.quoted.single.rust
917-
- match: \'
918-
scope: punctuation.definition.string.end.rust
913+
# ASCII except ', \, \n, \r or \t
914+
- match: '[\x00-\x08\x0b-\x0c\x0e-\x26\x28-\x5b\x5d-\x7f]'
915+
set: byte-tail
916+
# Don't mark entire file invalid while writing, even though this is
917+
# not valid syntax.
918+
- match: '\n'
919919
pop: true
920-
- include: escaped-byte
920+
- match: '{{escaped_byte}}'
921+
scope: constant.character.escape.rust
922+
set: byte-tail
923+
- match: ''
924+
set: byte-tail
925+
926+
byte-tail:
927+
- match: "'"
928+
scope: string.quoted.single.rust punctuation.definition.string.end.rust
929+
pop: true
930+
- match: '\n'
931+
pop: true
932+
- match: '.'
933+
scope: invalid.illegal.byte.rust
921934

922935
byte-string:
923936
- match: '(b)(")'
@@ -930,7 +943,10 @@ contexts:
930943
- match: '"'
931944
scope: punctuation.definition.string.end.rust
932945
pop: true
933-
- include: escaped-byte
946+
- match: '{{escaped_byte}}'
947+
scope: constant.character.escape.rust
948+
- match: '\\.'
949+
scope: invalid.illegal.character.escape.rust
934950

935951
raw-byte-string:
936952
- match: (br)(#*)"
@@ -947,16 +963,36 @@ contexts:
947963
escaped-char:
948964
- match: '{{escaped_char}}'
949965
scope: constant.character.escape.rust
966+
- match: '\\u{[^}]*}'
967+
scope: invalid.illegal.character.escape.rust
968+
- match: '\\.'
969+
scope: invalid.illegal.character.escape.rust
950970

951971
char:
952-
- match: "'(?=([^'\\\\]|{{escaped_char}})')"
972+
- match: "'"
953973
scope: punctuation.definition.string.begin.rust
954974
push:
955975
- meta_scope: string.quoted.single.rust
956-
- match: \'
957-
scope: punctuation.definition.string.end.rust
976+
- match: "[^'\\\\\n\r\t]"
977+
set: char-tail
978+
# Don't mark entire file invalid while writing, even though this is
979+
# not valid syntax.
980+
- match: '\n'
958981
pop: true
959-
- include: escaped-char
982+
- match: '{{escaped_char}}'
983+
scope: constant.character.escape.rust
984+
set: char-tail
985+
- match: ''
986+
set: char-tail
987+
988+
char-tail:
989+
- match: "'"
990+
scope: string.quoted.single.rust punctuation.definition.string.end.rust
991+
pop: true
992+
- match: '\n'
993+
pop: true
994+
- match: '.'
995+
scope: invalid.illegal.char.rust
960996

961997
string:
962998
- match: '"'

syntax_test_rust.rs

+101
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,107 @@ let raw_bytes = br#"This won't escape anything either \x01 \""#;
6666
// ^^ storage.type
6767
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ string.quoted.double - constant.character.escape
6868

69+
let b_simple = b'a';
70+
// ^^^^ string.quoted.single
71+
// ^ storage.type.string
72+
// ^ punctuation.definition.string.begin
73+
// ^ punctuation.definition.string.end
74+
// ^ punctuation.terminator
75+
let b_newline = b'\n';
76+
// ^^^^^ string.quoted.single
77+
// ^^ string.quoted.single constant.character.escape
78+
let b_nul = b'\0';
79+
// ^^ string.quoted.single constant.character.escape
80+
let b_back = b'\\';
81+
// ^^ string.quoted.single constant.character.escape
82+
let b_quote = b'\'';
83+
// ^^ string.quoted.single constant.character.escape
84+
let b_esc_nul = b'\x00';
85+
// ^^^^ string.quoted.single constant.character.escape
86+
let b_esc_255 = b'\xff';
87+
// ^^^^ string.quoted.single constant.character.escape
88+
let b_esc_inv = b'\a';
89+
// ^^ invalid.illegal.byte
90+
// ^ string.quoted.single punctuation.definition.string.end
91+
let b_inv_len = b'abc';
92+
// ^ string.quoted.single
93+
// ^^ invalid.illegal.byte
94+
// ^ string.quoted.single punctuation.definition.string.end
95+
let b_inv_uni = b'♥';
96+
// ^ invalid.illegal.byte
97+
// ^ string.quoted.single punctuation.definition.string.end
98+
let b_inv_empty = b'';
99+
// ^^^ string.quoted.single
100+
// ^ punctuation.definition.string.begin
101+
// ^ punctuation.definition.string.end
102+
let b_unclosed1 = b'
103+
// Avoid error on entire file.
104+
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ comment.line.double-slash - invalid - string
105+
106+
let bs_newline = b"abc\n";
107+
// ^^^^^^^^ string.quoted.double
108+
// ^ punctuation.definition.string.begin
109+
// ^^ constant.character.escape
110+
// ^ punctuation.definition.string.end
111+
// ^ punctuation.terminator
112+
let bs_nul = b"abc\0";
113+
// ^^ string.quoted.double constant.character.escape
114+
let bs_esc_nul = b"abc\x00";
115+
// ^^^^ string.quoted.double constant.character.escape
116+
let bs_esc_255 = b"abc\xff";
117+
// ^^^^ string.quoted.double constant.character.escape
118+
let bs_esc_inv = b"abc\a";
119+
// ^^ string.quoted.double invalid.illegal.character.escape
120+
// ^ string.quoted.double punctuation.definition.string.end - invalid
121+
122+
let char_newline = '\n';
123+
// ^^^^ string.quoted.single
124+
// ^ punctuation.definition.string.begin
125+
// ^^ constant.character.escape
126+
// ^ punctuation.definition.string.end
127+
// ^ punctuation.terminator
128+
let char_nul = '\0';
129+
// ^^ string.quoted.single constant.character.escape
130+
let char_extra_inv = 'ab';
131+
// ^ string.quoted.single
132+
// ^ invalid.illegal.char
133+
// ^ string.quoted.single punctuation.definition.string.end
134+
let char_ascii_esc_nul = '\x00';
135+
// ^^^^ string.quoted.single constant.character.escape
136+
let char_ascii_esc_127 = '\x7f';
137+
// ^^^^ string.quoted.single constant.character.escape
138+
let char_ascii_inv_255 = '\xff';
139+
// ^^^^ invalid.illegal.char
140+
let char_uni_esc = '\u{3b1}';
141+
// ^^^^^^^ string.quoted.single constant.character.escape
142+
let char_uni_esc_empty = '\u{}';
143+
// ^^^^ invalid.illegal.char
144+
let char_uni_esc_under_start = '\u{_1_}';
145+
// ^^^^^^^ invalid.illegal.char
146+
let char_uni_esc_under1 = '\u{1_}';
147+
// ^^^^^^ string.quoted.single constant.character.escape
148+
let char_uni_esc_under2 = '\u{1_2__3___}';
149+
// ^^^^^^^^^^^^^ string.quoted.single constant.character.escape
150+
let char_uni_esc_under3 = '\u{10__FFFF}';
151+
// ^^^^^^^^^^^^ string.quoted.single constant.character.escape
152+
let char_uni_esc_extra = '\u{1234567}';
153+
// ^^^^^^^^^^^ invalid.illegal.char
154+
155+
let s_ascii_inv_255 = "\xff";
156+
// ^^ string.quoted.double invalid.illegal.character.escape
157+
let s_uni_esc_empty = "\u{}";
158+
// ^^^^ string.quoted.double invalid.illegal.character.escape
159+
let s_uni_esc_under_start = "\u{_1_}";
160+
// ^^^^^^^ string.quoted.double invalid.illegal.character.escape
161+
let s_uni_esc_under1 = "\u{1_}";
162+
// ^^^^^^ string.quoted.double constant.character.escape
163+
let s_uni_esc_under2 = "\u{1_2__3___}";
164+
// ^^^^^^^^^^^^^ string.quoted.double constant.character.escape
165+
let s_uni_esc_under3 = "\u{10__FFFF}";
166+
// ^^^^^^^^^^^^ string.quoted.double constant.character.escape
167+
let s_uni_esc_extra = "\u{1234567}";
168+
// ^^^^^^^^^^^ string.quoted.double invalid.illegal.character.escape
169+
69170
0;
70171
// <- constant.numeric.integer.decimal
71172
1_000u32;

0 commit comments

Comments
 (0)