Skip to content

Commit 4a3e169

Browse files
committed
Make char::is_lowercase and char::is_uppercase const
Implements rust-lang#101400.
1 parent b11bf65 commit 4a3e169

File tree

5 files changed

+55
-27
lines changed

5 files changed

+55
-27
lines changed

library/core/src/char/methods.rs

+20-2
Original file line numberDiff line numberDiff line change
@@ -746,10 +746,19 @@ impl char {
746746
/// assert!(!'中'.is_lowercase());
747747
/// assert!(!' '.is_lowercase());
748748
/// ```
749+
///
750+
/// In a const context:
751+
///
752+
/// ```
753+
/// #![feature(const_unicode_case_lookup)]
754+
/// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
755+
/// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
756+
/// ```
749757
#[must_use]
750758
#[stable(feature = "rust1", since = "1.0.0")]
759+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
751760
#[inline]
752-
pub fn is_lowercase(self) -> bool {
761+
pub const fn is_lowercase(self) -> bool {
753762
match self {
754763
'a'..='z' => true,
755764
c => c > '\x7f' && unicode::Lowercase(c),
@@ -779,10 +788,19 @@ impl char {
779788
/// assert!(!'中'.is_uppercase());
780789
/// assert!(!' '.is_uppercase());
781790
/// ```
791+
///
792+
/// In a const context:
793+
///
794+
/// ```
795+
/// #![feature(const_unicode_case_lookup)]
796+
/// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
797+
/// assert!(CAPITAL_DELTA_IS_UPPERCASE);
798+
/// ```
782799
#[must_use]
783800
#[stable(feature = "rust1", since = "1.0.0")]
801+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
784802
#[inline]
785-
pub fn is_uppercase(self) -> bool {
803+
pub const fn is_uppercase(self) -> bool {
786804
match self {
787805
'A'..='Z' => true,
788806
c => c > '\x7f' && unicode::Uppercase(c),

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
#![feature(const_type_id)]
144144
#![feature(const_type_name)]
145145
#![feature(const_default_impls)]
146+
#![feature(const_unicode_case_lookup)]
146147
#![feature(const_unsafecell_get_mut)]
147148
#![feature(core_panic)]
148149
#![feature(duration_consts_float)]

library/core/src/unicode/unicode_data.rs

+18-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
///! This file is generated by src/tools/unicode-table-generator; do not edit manually!
22
3+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
34
#[inline(always)]
4-
fn bitset_search<
5+
const fn bitset_search<
56
const N: usize,
67
const CHUNK_SIZE: usize,
78
const N1: usize,
@@ -17,14 +18,14 @@ fn bitset_search<
1718
let bucket_idx = (needle / 64) as usize;
1819
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
1920
let chunk_piece = bucket_idx % CHUNK_SIZE;
20-
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
21-
v
21+
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
22+
chunk_idx_map[chunk_map_idx]
2223
} else {
2324
return false;
2425
};
2526
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
26-
let word = if let Some(word) = bitset_canonical.get(idx) {
27-
*word
27+
let word = if idx < bitset_canonical.len() {
28+
bitset_canonical[idx]
2829
} else {
2930
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
3031
let mut word = bitset_canonical[real_idx as usize];
@@ -318,14 +319,14 @@ pub mod grapheme_extend {
318319

319320
#[rustfmt::skip]
320321
pub mod lowercase {
321-
static BITSET_CHUNKS_MAP: [u8; 123] = [
322+
const BITSET_CHUNKS_MAP: [u8; 123] = [
322323
14, 17, 0, 0, 9, 0, 0, 12, 13, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323324
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324325
0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
325326
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0,
326327
3, 0, 0, 7,
327328
];
328-
static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
329+
const BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
329330
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
330331
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0],
331332
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 55, 0],
@@ -346,7 +347,7 @@ pub mod lowercase {
346347
[16, 49, 2, 20, 66, 9, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0],
347348
[63, 39, 54, 12, 73, 61, 18, 1, 6, 62, 71, 19, 68, 69, 3, 44],
348349
];
349-
static BITSET_CANONICAL: [u64; 55] = [
350+
const BITSET_CANONICAL: [u64; 55] = [
350351
0b0000000000000000000000000000000000000000000000000000000000000000,
351352
0b1111111111111111110000000000000000000000000011111111111111111111,
352353
0b1010101010101010101010101010101010101010101010101010100000000010,
@@ -403,13 +404,14 @@ pub mod lowercase {
403404
0b1110011111111111111111111111111111111111111111110000000000000000,
404405
0b1110101111000000000000000000000000001111111111111111111111111100,
405406
];
406-
static BITSET_MAPPING: [(u8, u8); 20] = [
407+
const BITSET_MAPPING: [(u8, u8); 20] = [
407408
(0, 64), (1, 188), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66), (1, 70),
408409
(1, 77), (2, 146), (2, 144), (2, 83), (3, 12), (3, 6), (4, 156), (4, 78), (5, 187),
409410
(6, 132), (7, 93),
410411
];
411412

412-
pub fn lookup(c: char) -> bool {
413+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
414+
pub const fn lookup(c: char) -> bool {
413415
super::bitset_search(
414416
c as u32,
415417
&BITSET_CHUNKS_MAP,
@@ -454,14 +456,14 @@ pub mod n {
454456

455457
#[rustfmt::skip]
456458
pub mod uppercase {
457-
static BITSET_CHUNKS_MAP: [u8; 125] = [
459+
const BITSET_CHUNKS_MAP: [u8; 125] = [
458460
12, 15, 6, 6, 0, 6, 6, 2, 4, 11, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
459461
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
460462
6, 6, 6, 5, 6, 14, 6, 10, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
461463
6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6,
462464
6, 6, 9, 6, 3,
463465
];
464-
static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
466+
const BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
465467
[43, 43, 5, 34, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 5, 1],
466468
[43, 43, 5, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
467469
[43, 43, 39, 43, 43, 43, 43, 43, 17, 17, 62, 17, 42, 29, 24, 23],
@@ -480,7 +482,7 @@ pub mod uppercase {
480482
[57, 19, 2, 18, 10, 47, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
481483
[57, 37, 17, 27, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
482484
];
483-
static BITSET_CANONICAL: [u64; 43] = [
485+
const BITSET_CANONICAL: [u64; 43] = [
484486
0b0000011111111111111111111111111000000000000000000000000000000000,
485487
0b0000000000111111111111111111111111111111111111111111111111111111,
486488
0b0101010101010101010101010101010101010101010101010101010000000001,
@@ -525,13 +527,14 @@ pub mod uppercase {
525527
0b1111011111111111000000000000000000000000000000000000000000000000,
526528
0b1111111100000000111111110000000000111111000000001111111100000000,
527529
];
528-
static BITSET_MAPPING: [(u8, u8); 25] = [
530+
const BITSET_MAPPING: [(u8, u8); 25] = [
529531
(0, 187), (0, 177), (0, 171), (0, 167), (0, 164), (0, 32), (0, 47), (0, 51), (0, 121),
530532
(0, 117), (0, 109), (1, 150), (1, 148), (1, 142), (1, 134), (1, 131), (1, 64), (2, 164),
531533
(2, 146), (2, 20), (3, 146), (3, 140), (3, 134), (4, 178), (4, 171),
532534
];
533535

534-
pub fn lookup(c: char) -> bool {
536+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
537+
pub const fn lookup(c: char) -> bool {
535538
super::bitset_search(
536539
c as u32,
537540
&BITSET_CHUNKS_MAP,

src/tools/unicode-table-generator/src/range_search.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
12
#[inline(always)]
2-
fn bitset_search<
3+
const fn bitset_search<
34
const N: usize,
45
const CHUNK_SIZE: usize,
56
const N1: usize,
@@ -15,14 +16,14 @@ fn bitset_search<
1516
let bucket_idx = (needle / 64) as usize;
1617
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
1718
let chunk_piece = bucket_idx % CHUNK_SIZE;
18-
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
19-
v
19+
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
20+
chunk_idx_map[chunk_map_idx]
2021
} else {
2122
return false;
2223
};
2324
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
24-
let word = if let Some(word) = bitset_canonical.get(idx) {
25-
*word
25+
let word = if idx < bitset_canonical.len() {
26+
bitset_canonical[idx]
2627
} else {
2728
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
2829
let mut word = bitset_canonical[real_idx as usize];

src/tools/unicode-table-generator/src/raw_emitter.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,15 @@ impl RawEmitter {
7676

7777
writeln!(
7878
&mut self.file,
79-
"static BITSET_CANONICAL: [u64; {}] = [{}];",
79+
"const BITSET_CANONICAL: [u64; {}] = [{}];",
8080
canonicalized.canonical_words.len(),
8181
fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))),
8282
)
8383
.unwrap();
8484
self.bytes_used += 8 * canonicalized.canonical_words.len();
8585
writeln!(
8686
&mut self.file,
87-
"static BITSET_MAPPING: [(u8, u8); {}] = [{}];",
87+
"const BITSET_MAPPING: [(u8, u8); {}] = [{}];",
8888
canonicalized.canonicalized_words.len(),
8989
fmt_list(&canonicalized.canonicalized_words),
9090
)
@@ -96,7 +96,12 @@ impl RawEmitter {
9696

9797
self.blank_line();
9898

99-
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
99+
writeln!(
100+
&mut self.file,
101+
r#"#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]"#
102+
)
103+
.unwrap();
104+
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
100105
writeln!(&mut self.file, " super::bitset_search(",).unwrap();
101106
writeln!(&mut self.file, " c as u32,").unwrap();
102107
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
@@ -130,15 +135,15 @@ impl RawEmitter {
130135

131136
writeln!(
132137
&mut self.file,
133-
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
138+
"const BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
134139
chunk_indices.len(),
135140
fmt_list(&chunk_indices),
136141
)
137142
.unwrap();
138143
self.bytes_used += chunk_indices.len();
139144
writeln!(
140145
&mut self.file,
141-
"static BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];",
146+
"const BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];",
142147
chunk_length,
143148
chunks.len(),
144149
fmt_list(chunks.iter()),

0 commit comments

Comments
 (0)