Skip to content

Commit b9119ed

Browse files
committed
auto merge of #7114 : pnkfelix/rust/issue3961-fix-whitespace-detection, r=brson
r? (yes, the review request is back, now that I got it building against incom... I mean master!) (Attempting to port from orphaned pull-request #6764 ) Fix for #3961. Also includes a test case to illustrate the issues. (All of the entries that say "should align" should align with each other, and the four lines near the end that say "compare _" for _ in {A,B,C,D} should line up with each other.) Before applying this change set: -- the "(should align)"'s are all over the place, and the form/line feeding spaces are not cut out as one might or might not expect. -- compare B and D do not match A and C. (To be honest, its hard to really say what the right behavior is here, and people who are expecting a particular behavior out of a pretty printer in these cases may well get burned.)
2 parents c989b79 + 876f6de commit b9119ed

File tree

3 files changed

+248
-14
lines changed

3 files changed

+248
-14
lines changed

src/libsyntax/parse/comments.rs

+23-14
Original file line numberDiff line numberDiff line change
@@ -197,26 +197,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
197197
}
198198
}
199199

200-
// FIXME #3961: This is not the right way to convert string byte
201-
// offsets to characters.
202-
fn all_whitespace(s: &str, begin: uint, end: uint) -> bool {
203-
let mut i: uint = begin;
204-
while i != end {
205-
if !is_whitespace(s[i] as char) { return false; } i += 1u;
200+
// Returns None if the first col chars of s contain a non-whitespace char.
201+
// Otherwise returns Some(k) where k is first char offset after that leading
202+
// whitespace. Note k may be outside bounds of s.
203+
fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
204+
let len = s.len();
205+
let mut col = col.to_uint();
206+
let mut cursor: uint = 0;
207+
while col > 0 && cursor < len {
208+
let r: str::CharRange = s.char_range_at(cursor);
209+
if !r.ch.is_whitespace() {
210+
return None;
211+
}
212+
cursor = r.next;
213+
col -= 1;
206214
}
207-
return true;
215+
return Some(cursor);
208216
}
209217

210218
fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
211219
s: ~str, col: CharPos) {
212220
let len = s.len();
213-
// FIXME #3961: Doing bytewise comparison and slicing with CharPos
214-
let col = col.to_uint();
215-
let s1 = if all_whitespace(s, 0, uint::min(len, col)) {
216-
if col < len {
217-
s.slice(col, len).to_owned()
218-
} else { ~"" }
219-
} else { s };
221+
let s1 = match all_whitespace(s, col) {
222+
Some(col) => {
223+
if col < len {
224+
s.slice(col, len).to_owned()
225+
} else { ~"" }
226+
}
227+
None => s,
228+
};
220229
debug!("pushing line: %s", s1);
221230
lines.push(s1);
222231
}
+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// This is meant as a test case for Issue 3961.
12+
//
13+
// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
14+
//
15+
// pp-exact:block-comment-wchar.pp
16+
fn f() {
17+
fn nested() {
18+
/*
19+
Spaced2
20+
*/
21+
/*
22+
Spaced10
23+
*/
24+
/*
25+
Tabbed8+2
26+
*/
27+
/*
28+
CR8+2
29+
*/
30+
}
31+
/*
32+
Spaced2: (prefixed so start of space aligns with comment)
33+
*/
34+
/*
35+
Tabbed2: (more indented b/c *start* of space will align with comment)
36+
*/
37+
/*
38+
Spaced6: (Alignment removed and realigning spaces inserted)
39+
*/
40+
/*
41+
Tabbed4+2: (Alignment removed and realigning spaces inserted)
42+
*/
43+
44+
/*
45+
VT4+2: (should align)
46+
*/
47+
/*
48+
FF4+2: (should align)
49+
*/
50+
/*
51+
CR4+2: (should align)
52+
*/
53+
/*
54+
// (NEL deliberately omitted)
55+
*/
56+
/*
57+
Ogham Space Mark 4+2: (should align)
58+
*/
59+
/*
60+
Mongolian Vowel Separator 4+2: (should align)
61+
*/
62+
/*
63+
Four-per-em space 4+2: (should align)
64+
*/
65+
66+
/*
67+
Mongolian Vowel Sep count 1: (should align)
68+
Mongolian Vowel Sep count 2: (should align)
69+
Mongolian Vowel Sep count 3: (should align)
70+
Mongolian Vowel Sep count 4: (should align)
71+
Mongolian Vowel Sep count 5: (should align)
72+
Mongolian Vowel Sep count 6: (should align)
73+
Mongolian Vowel Sep count 7: (should align)
74+
Mongolian Vowel Sep count 8: (should align)
75+
Mongolian Vowel Sep count 9: (should align)
76+
Mongolian Vowel Sep count A: (should align)
77+
Mongolian Vowel Sep count B: (should align)
78+
Mongolian Vowel Sep count C: (should align)
79+
Mongolian Vowel Sep count D: (should align)
80+
Mongolian Vowel Sep count E: (should align)
81+
Mongolian Vowel Sep count F: (should align)
82+
*/
83+
84+
85+
86+
/* */
87+
88+
/*
89+
Hello from offset 6
90+
Space 6+2: compare A
91+
Mongolian Vowel Separator 6+2: compare B
92+
*/
93+
94+
/*᠎*/
95+
96+
/*
97+
Hello from another offset 6 with wchars establishing column offset
98+
Space 6+2: compare C
99+
Mongolian Vowel Separator 6+2: compare D
100+
*/
101+
}
102+
103+
fn main() {
104+
// Taken from http://en.wikipedia.org/wiki/Whitespace_character
105+
let chars =
106+
['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
107+
// '\x85', // for some reason Rust thinks NEL isn't whitespace
108+
'\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
109+
'\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
110+
'\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
111+
// <= bugs in pretty-printer?
112+
for chars.each |c| {
113+
let ws = c.is_whitespace();
114+
println(fmt!("%? %?" , c , ws));
115+
}
116+
}

src/test/pretty/block-comment-wchar.rs

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// This is meant as a test case for Issue 3961.
12+
//
13+
// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
14+
//
15+
// pp-exact:block-comment-wchar.pp
16+
fn f() {
17+
fn nested() {
18+
/*
19+
Spaced2
20+
*/
21+
/*
22+
Spaced10
23+
*/
24+
/*
25+
Tabbed8+2
26+
*/
27+
/*
28+
CR8+2
29+
*/
30+
}
31+
/*
32+
Spaced2: (prefixed so start of space aligns with comment)
33+
*/
34+
/*
35+
Tabbed2: (more indented b/c *start* of space will align with comment)
36+
*/
37+
/*
38+
Spaced6: (Alignment removed and realigning spaces inserted)
39+
*/
40+
/*
41+
Tabbed4+2: (Alignment removed and realigning spaces inserted)
42+
*/
43+
44+
/*
45+
VT4+2: (should align)
46+
*/
47+
/*
48+
FF4+2: (should align)
49+
*/
50+
/*
51+
CR4+2: (should align)
52+
*/
53+
/*
54+
// (NEL deliberately omitted)
55+
*/
56+
/*
57+
     Ogham Space Mark 4+2: (should align)
58+
*/
59+
/*
60+
᠎᠎᠎᠎ Mongolian Vowel Separator 4+2: (should align)
61+
*/
62+
/*
63+
     Four-per-em space 4+2: (should align)
64+
*/
65+
66+
/*
67+
᠎ Mongolian Vowel Sep count 1: (should align)
68+
᠎ Mongolian Vowel Sep count 2: (should align)
69+
᠎᠎ Mongolian Vowel Sep count 3: (should align)
70+
᠎ Mongolian Vowel Sep count 4: (should align)
71+
᠎ ᠎ Mongolian Vowel Sep count 5: (should align)
72+
᠎᠎ Mongolian Vowel Sep count 6: (should align)
73+
᠎᠎᠎ Mongolian Vowel Sep count 7: (should align)
74+
᠎ Mongolian Vowel Sep count 8: (should align)
75+
᠎ ᠎ Mongolian Vowel Sep count 9: (should align)
76+
᠎ ᠎ Mongolian Vowel Sep count A: (should align)
77+
᠎ ᠎᠎ Mongolian Vowel Sep count B: (should align)
78+
᠎᠎ Mongolian Vowel Sep count C: (should align)
79+
᠎᠎ ᠎ Mongolian Vowel Sep count D: (should align)
80+
᠎᠎᠎ Mongolian Vowel Sep count E: (should align)
81+
᠎᠎᠎᠎ Mongolian Vowel Sep count F: (should align)
82+
*/
83+
84+
85+
/* */ /*
86+
Hello from offset 6
87+
Space 6+2: compare A
88+
᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare B
89+
*/
90+
/*᠎*/ /*
91+
Hello from another offset 6 with wchars establishing column offset
92+
Space 6+2: compare C
93+
᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare D
94+
*/
95+
}
96+
97+
fn main() {
98+
// Taken from http://en.wikipedia.org/wiki/Whitespace_character
99+
let chars =
100+
['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
101+
// '\x85', // for some reason Rust thinks NEL isn't whitespace
102+
'\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
103+
'\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
104+
'\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
105+
for chars.each |c| {
106+
let ws = c.is_whitespace();
107+
println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer?
108+
}
109+
}

0 commit comments

Comments
 (0)