Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Define non-panicking UTF encoding methods on char #52580

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 87 additions & 20 deletions src/libcore/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,46 @@ impl char {
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
#[inline]
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
let l = dst.len();
match self.try_encode_utf8(dst) {
Some(s) => s,
None => panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
self.len_utf8(), self as u32, l),
}
}

/// Encodes this character as UTF-8 into the provided byte buffer,
/// and then returns the subslice of the buffer that contains the encoded character.
/// Returns `None` if buffer too short.
///
/// # Examples
///
/// In both of these examples, 'ß' takes two bytes to encode.
///
/// ```
/// #![feature(try_unicode_encode_char)]
///
/// let mut b = [0; 2];
///
/// let result = 'ß'.try_encode_utf8(&mut b).unwrap();
///
/// assert_eq!(result, "ß");
///
/// assert_eq!(result.len(), 2);
/// ```
///
/// A buffer that's too small:
///
/// ```
/// #![feature(try_unicode_encode_char)]
///
/// let mut b = [0; 1];
///
/// assert_eq!(None, 'ß'.try_encode_utf8(&mut b));
/// ```
#[unstable(feature = "try_unicode_encode_char", issue = "52579")]
#[inline]
pub fn try_encode_utf8(self, dst: &mut [u8]) -> Option<&mut str> {
let code = self as u32;
unsafe {
let len =
Expand All @@ -458,12 +498,9 @@ impl char {
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
4
} else {
panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf8(),
code,
dst.len())
return None;
};
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
Some(from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)))
}
}

Expand All @@ -484,43 +521,73 @@ impl char {
///
/// let result = '𝕊'.encode_utf16(&mut b);
///
/// assert_eq!(result, "𝕊");
///
/// assert_eq!(result.len(), 2);
/// ```
///
/// A buffer that's too small:
///
/// ```
/// use std::thread;
/// let mut b = [0; 1];
///
/// let result = thread::spawn(|| {
/// let mut b = [0; 1];
///
/// // this panics
/// '𝕊'.encode_utf16(&mut b);
/// }).join();
///
/// assert!(result.is_err());
/// assert_eq!(None, '𝕊'.encode_utf16(&mut b));
/// ```
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
#[inline]
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
let l = dst.len();
match self.try_encode_utf16(dst) {
Some(s) => s,
None => panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
self.len_utf16(), self as u32, l),
}
}

/// Encodes this character as UTF-16 into the provided `u16` buffer,
/// and then returns the subslice of the buffer that contains the encoded character.
/// Returns `None` if buffer too short.
///
/// # Examples
///
/// In both of these examples, '𝕊' takes two `u16`s to encode.
///
/// ```
/// #![feature(try_unicode_encode_char)]
///
/// let mut b = [0; 2];
///
/// let result = '𝕊'.try_encode_utf16(&mut b).unwrap();
///
/// assert_eq!(result.len(), 2);
/// ```
///
/// A buffer that's too small:
///
/// ```
/// #![feature(try_unicode_encode_char)]
///
/// let mut b = [0; 1];
///
/// assert_eq!(None, '𝕊'.try_encode_utf16(&mut b));
/// ```
#[unstable(feature = "try_unicode_encode_char", issue = "52579")]
#[inline]
pub fn try_encode_utf16(self, dst: &mut [u16]) -> Option<&mut [u16]> {
let mut code = self as u32;
unsafe {
if (code & 0xFFFF) == code && !dst.is_empty() {
// The BMP falls through (assuming non-surrogate, as it should)
*dst.get_unchecked_mut(0) = code as u16;
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
Some(slice::from_raw_parts_mut(dst.as_mut_ptr(), 1))
} else if dst.len() >= 2 {
// Supplementary planes break into surrogates.
code -= 0x1_0000;
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
Some(slice::from_raw_parts_mut(dst.as_mut_ptr(), 2))
} else {
panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf16(),
code,
dst.len())
None
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/libcore/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
#![feature(const_slice_len)]
#![feature(const_str_as_bytes)]
#![feature(const_str_len)]
#![feature(try_unicode_encode_char)]

#[prelude_import]
#[allow(unused)]
Expand Down