From 2c78050f1d711f647bda4ab41df5a998a5ccfb8f Mon Sep 17 00:00:00 2001 From: Julian Orth Date: Mon, 30 Nov 2015 17:29:32 +0100 Subject: [PATCH 1/2] pod --- text/0000-pod.md | 195 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 text/0000-pod.md diff --git a/text/0000-pod.md b/text/0000-pod.md new file mode 100644 index 00000000000..1735a6aa43c --- /dev/null +++ b/text/0000-pod.md @@ -0,0 +1,195 @@ +- Feature Name: pod +- Start Date: 2015-11-30 +- RFC PR: +- Rust Issue: + +# Summary +[summary]: #summary + +Add a `pod` language item and marker trait. The trait can only be implemented by +a subset of all types and identifies objects that are valid when they contain +arbitrary bit patterns. + +# Motivation +[motivation]: #motivation + +Several not-uncommon ffi operations cannot be expressed in safe code even though +they are completely safe. This causes users to reinvent the necessary functions +for said operations themselves, often incorrectly. For example, the following +piece of code attempts to write a sequence of `u32` with the `Write` trait: + +```rust +let to_write = unsafe { mem::transmute::<&[u32], &[u8]>(&self.data) }; +self.file.write(to_write); +``` + +This code is incorrect because the `to_write` slice has the same number of +elements as the original slice instead of four times as many. + +Consider the following similar case: + +```rust +pub struct linger { + pub l_onoff: c_int, + pub l_linger: c_int, +} + +impl Socket { + pub fn linger(&self) -> Result { + let mut linger = mem::zeroed(); + try!(getsockopt(self.fd, SO_LINGER, mem::as_mut_bytes(&mut linger))) + Ok(linger) + } +} +``` + +This function uses the generic `getsockopt` system call to retrieve the *linger* +setting of a socket: + +```rust +fn getsockopt(sockfd: c_int, optname: c_int, optval: &mut [u8]) -> Result; +``` + +The `linger` function cannot be written today because there is no +`mem::as_mut_bytes` function. Instead, the user would most likely create a slice +ad hoc with unsafe functions and manual size calculations. + +Consider the case of C unions: + +Various proposals have been made how to express C unions in rust code. The most +recent proposal suggested restricting the types in such unions to `Copy` in +order to avoid various sources of unsafety. However, such unions would still +have to be `unsafe` since, for example, `&u8` is `Copy` but cannot contain +arbitrary data. + +Note that C unions only ever contain plain old data (as defined below) since all +possible C types are plain old data. Hence, if the rust equivalent of C unions +were restricted to plain old data, using it would be completely safe.† + +(† Some questions regarding the existence of `undef` fields in large variants +remain.) + +Lastly, a plain old data type allows for the following simple implementation of +a random value generator: + +```rust +pub trait RandomValueGenerator { + fn generate_random_bytes(&mut self, bytes: &mut [u8]); + + fn generate(&mut self) -> T { + let mut t = mem::zeroed(); + self.create_random_bytes(mem::as_mut_bytes(&mut t)); + t + } +} +``` + +## Various convenience functions using plain old data + +This section contains a list of useful functions that can be written with a +plain old data trait. + +```rust +/// Creates an object that has all bytes set to zero. +fn zeroed() -> T; +``` + +```rust +/// Returns the mutable in-memory representation of an object. +fn as_mut_bytes(val: &mut T) -> &mut [u8]; +``` + +```rust +/// Turns a slice into a reference to a Pod type if it's suitable. +/// +/// = Remarks +/// +/// The buffer is suitable if it is large enough to hold the type and properly +/// aligned. +pub fn from_bytes(buf: &[u8]) -> Option<&T>; + +pub fn from_mut_bytes(buf: &mut [u8]) -> Option<&mut T>; +``` + +```rust +impl [T] { + /// Returns a mutable byte slice covering the same range as the slice. + pub fn as_mut_bytes(&mut self) -> &mut [u8] where T: Pod; +} +``` + +# Detailed design +[design]: #detailed-design + +Add a `pod` lang item and safe `Pod` trait to mark types which are valid when +they contain arbitrary bit patterns. + +The following types are **Pod candidates**: + +* `u8`, `u16`, `u32`, `u64`, `usize`, +* `i8`, `i16`, `i32`, `i64`, `isize`, +* `f32`, `f64`, +* raw pointers, +* arrays of `Pod` types, +* tuples of `Pod` types, and +* structs where all fields are public and `Pod`. + +Only types that are Pod candidates can implement the `Pod` trait. Arrays +of `Pod` types and tuples of `Pod` types automatically implement the `Pod` +trait. + +Structs with `Drop` implementations are not Pod candidates since the drop flag +is considered a private field. + +## Justification of the set of Pod candidates + +It is easy to see that the list above contains only plain old data types by +applying the following criterion recursively: + +>If an object containing an arbitrary bit pattern can be constructed in safe +>code today, then the type of the object is a plain old data type.† + +(† This excludes padding between struct fields.) + +It is also clear that references (including slices) and enums are not plain old +data types. The case of structs with `Drop` implementations or hidden fields is +somewhat harder. First of all, consider the following type: + +```rust +struct Slice { + ptr: *const T, + len: usize, +} +``` + +The inherent methods of this type will likely make use of the fact that the user +cannot modify the `len` field in order to guarantee safety. Since the compiler +cannot know if the private fields must satisfy certain invariants, an +implementation of the safe (!) `Pod` trait cannot be allowed for such types. + +Second of all, note that `Pod` implies `Copy` with the set of safe functions +suggested above: + +```rust +let mut a: T = /* ... */ +let mut b: T = mem::zeroed(); +memcpy(mem::as_mut_bytes(&mut b), mem::as_mut_bytes(&mut a)); +``` + +Since `Copy` types cannot be `Drop`, the same restriction should be applied to +`Pod`, even if the drop flag is not stored directly in the object. + +# Drawbacks +[drawbacks]: #drawbacks + +None known. + +# Alternatives +[alternatives]: #alternatives + +`Pod` could be unsafe to allow structs with private fields to be `Pod`. + +# Unresolved questions +[unresolved]: #unresolved-questions + +How LLVM treats uninitialized bytes in struct padding and unions. From 6d105e3c804e595110777897fa5c534c6e7a26b3 Mon Sep 17 00:00:00 2001 From: Julian Orth Date: Tue, 8 Dec 2015 07:15:24 +0100 Subject: [PATCH 2/2] update --- text/0000-pod.md | 48 +++++------------------------------------------- 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/text/0000-pod.md b/text/0000-pod.md index 1735a6aa43c..ea76d1124ea 100644 --- a/text/0000-pod.md +++ b/text/0000-pod.md @@ -132,52 +132,14 @@ The following types are **Pod candidates**: * raw pointers, * arrays of `Pod` types, * tuples of `Pod` types, and -* structs where all fields are public and `Pod`. +* structs where all fields are `Pod`. Only types that are Pod candidates can implement the `Pod` trait. Arrays of `Pod` types and tuples of `Pod` types automatically implement the `Pod` trait. -Structs with `Drop` implementations are not Pod candidates since the drop flag -is considered a private field. - -## Justification of the set of Pod candidates - -It is easy to see that the list above contains only plain old data types by -applying the following criterion recursively: - ->If an object containing an arbitrary bit pattern can be constructed in safe ->code today, then the type of the object is a plain old data type.† - -(† This excludes padding between struct fields.) - -It is also clear that references (including slices) and enums are not plain old -data types. The case of structs with `Drop` implementations or hidden fields is -somewhat harder. First of all, consider the following type: - -```rust -struct Slice { - ptr: *const T, - len: usize, -} -``` - -The inherent methods of this type will likely make use of the fact that the user -cannot modify the `len` field in order to guarantee safety. Since the compiler -cannot know if the private fields must satisfy certain invariants, an -implementation of the safe (!) `Pod` trait cannot be allowed for such types. - -Second of all, note that `Pod` implies `Copy` with the set of safe functions -suggested above: - -```rust -let mut a: T = /* ... */ -let mut b: T = mem::zeroed(); -memcpy(mem::as_mut_bytes(&mut b), mem::as_mut_bytes(&mut a)); -``` - -Since `Copy` types cannot be `Drop`, the same restriction should be applied to -`Pod`, even if the drop flag is not stored directly in the object. +The compiler ensures that padding bytes are initialized with unspecified values +whenever a `pod` object is initialized. # Drawbacks [drawbacks]: #drawbacks @@ -187,9 +149,9 @@ None known. # Alternatives [alternatives]: #alternatives -`Pod` could be unsafe to allow structs with private fields to be `Pod`. +None at the moment. # Unresolved questions [unresolved]: #unresolved-questions -How LLVM treats uninitialized bytes in struct padding and unions. +None at the moment.