Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce -Zsplit-metadata to split metadata out of rlibs/dylibs #137535

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ fn link_rlib<'a>(
let (metadata, metadata_position) = create_wrapper_file(
sess,
".rmeta".to_string(),
codegen_results.metadata.raw_data(),
codegen_results.metadata.stub_or_full(),
);
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
match metadata_position {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_ssa/src/back/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,8 @@ pub fn create_compressed_metadata_file(
symbol_name: &str,
) -> Vec<u8> {
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.raw_data());
packed_metadata.write_all(&(metadata.stub_or_full().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.stub_or_full());

let Some(mut file) = create_object_file(sess) else {
if sess.target.is_like_wasm {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,7 @@ fn test_unstable_options_tracking_hash() {
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
tracked!(small_data_threshold, Some(16));
tracked!(split_lto_unit, Some(true));
tracked!(split_metadata, true);
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
tracked!(stack_protector, StackProtector::All);
tracked!(teach, true);
Expand Down
26 changes: 20 additions & 6 deletions compiler/rustc_metadata/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{fs, io};

use rustc_data_structures::temp_dir::MaybeTempDir;
use rustc_middle::ty::TyCtxt;
use rustc_session::config::{OutFileName, OutputType};
use rustc_session::config::{CrateType, OutFileName, OutputType};
use rustc_session::output::filename_for_metadata;
use rustc_session::{MetadataKind, Session};
use tempfile::Builder as TempFileBuilder;
Expand Down Expand Up @@ -50,7 +50,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
let metadata_stub_filename = if tcx.sess.opts.unstable_opts.split_metadata
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
{
Some(metadata_tmpdir.as_ref().join("stub.rmeta"))
} else {
None
};

// Always create a file at `metadata_filename`, even if we have nothing to write to it.
// This simplifies the creation of the output `out_filename` when requested.
Expand All @@ -60,9 +67,15 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
if let Some(metadata_stub_filename) = &metadata_stub_filename {
std::fs::File::create(metadata_stub_filename).unwrap_or_else(|err| {
tcx.dcx()
.emit_fatal(FailedCreateFile { filename: &metadata_stub_filename, err });
});
}
}
MetadataKind::Uncompressed | MetadataKind::Compressed => {
encode_metadata(tcx, &metadata_filename);
encode_metadata(tcx, &metadata_filename, metadata_stub_filename.as_deref())
}
};

Expand Down Expand Up @@ -100,9 +113,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {

// Load metadata back to memory: codegen may need to include it in object files.
let metadata =
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});
EncodedMetadata::from_path(metadata_filename, metadata_stub_filename, metadata_tmpdir)
.unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});

let need_metadata_module = metadata_kind == MetadataKind::Compressed;

Expand Down
59 changes: 33 additions & 26 deletions compiler/rustc_metadata/src/locator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ pub(crate) struct CrateLocator<'a> {

// Immutable per-search configuration.
crate_name: Symbol,
// Dependency paths passed through --extern
exact_paths: Vec<CanonicalizedPath>,
pub hash: Option<Svh>,
extra_filename: Option<&'a str>,
Expand Down Expand Up @@ -511,6 +512,8 @@ impl<'a> CrateLocator<'a> {
rlib: self.extract_one(rlibs, CrateFlavor::Rlib, &mut slot)?,
dylib: self.extract_one(dylibs, CrateFlavor::Dylib, &mut slot)?,
};
// Question: check if we have no rmeta, but rlib/dylib with is_stub, and in that case
// invoke `find_library`?
Ok(slot.map(|(svh, metadata, _)| (svh, Library { source, metadata })))
}

Expand Down Expand Up @@ -728,37 +731,41 @@ impl<'a> CrateLocator<'a> {
let Some(file) = loc_orig.file_name().and_then(|s| s.to_str()) else {
return Err(CrateError::ExternLocationNotFile(self.crate_name, loc_orig.clone()));
};
// FnMut cannot return reference to captured value, so references
// must be taken outside the closure.
let rlibs = &mut rlibs;
let rmetas = &mut rmetas;
let dylibs = &mut dylibs;
let type_via_filename = (|| {
if file.starts_with("lib") {
if file.ends_with(".rlib") {
return Some(rlibs);
if file.starts_with("lib") {
if file.ends_with(".rlib") {
// In case the .rlib contains only stub metadata, we will most likely
// need to load a corresponding .rmeta file. Since it will often be
// located right next to the .rlib path, directly add it to the candidate
// paths as a "fast-path".
let possible_rmeta_path = loc_orig.with_extension("rmeta");
if let Ok(rmeta_path) = try_canonicalize(possible_rmeta_path) {
rmetas.insert(rmeta_path, PathKind::ExternFlag);
}
if file.ends_with(".rmeta") {
return Some(rmetas);
}
}
let dll_prefix = self.target.dll_prefix.as_ref();
let dll_suffix = self.target.dll_suffix.as_ref();
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
return Some(dylibs);
rlibs.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
None
})();
match type_via_filename {
Some(type_via_filename) => {
type_via_filename.insert(loc_canon.clone(), PathKind::ExternFlag);
if file.ends_with(".rmeta") {
rmetas.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
None => {
self.crate_rejections
.via_filename
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
}
let dll_prefix = self.target.dll_prefix.as_ref();
let dll_suffix = self.target.dll_suffix.as_ref();
if let Some(stem) =
file.strip_prefix(dll_prefix).and_then(|name| name.strip_suffix(dll_suffix))
{
// See comment above about stub metadata, it applies also here for dylibs
let possible_rmeta_path =
loc_orig.parent().unwrap().join(format!("lib{stem}.rmeta"));
if let Ok(rmeta_path) = try_canonicalize(possible_rmeta_path) {
rmetas.insert(rmeta_path, PathKind::ExternFlag);
}
dylibs.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
self.crate_rejections
.via_filename
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
}

// Extract the dylib/rlib/rmeta triple.
Expand Down
91 changes: 72 additions & 19 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: proc_macro_data.is_some(),
is_stub: false,
},
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
Expand Down Expand Up @@ -2235,54 +2236,75 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
// generated regardless of trailing bytes that end up in it.

pub struct EncodedMetadata {
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
mmap: Option<Mmap>,
// The declaration order matters because `full_metadata` should be dropped
// before `_temp_dir`.
full_metadata: Option<Mmap>,
// This is an optional stub metadata containing only the crate header.
// The header should be very small, so we load it directly into memory.
stub_metadata: Option<Vec<u8>>,
// We need to carry MaybeTempDir to avoid deleting the temporary
// directory while accessing the Mmap.
_temp_dir: Option<MaybeTempDir>,
}

impl EncodedMetadata {
#[inline]
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
pub fn from_path(
path: PathBuf,
stub_path: Option<PathBuf>,
temp_dir: Option<MaybeTempDir>,
) -> std::io::Result<Self> {
let file = std::fs::File::open(&path)?;
let file_metadata = file.metadata()?;
if file_metadata.len() == 0 {
return Ok(Self { mmap: None, _temp_dir: None });
return Ok(Self { full_metadata: None, stub_metadata: None, _temp_dir: None });
}
let mmap = unsafe { Some(Mmap::map(file)?) };
Ok(Self { mmap, _temp_dir: temp_dir })
let full_mmap = unsafe { Some(Mmap::map(file)?) };

let stub =
if let Some(stub_path) = stub_path { Some(std::fs::read(stub_path)?) } else { None };

Ok(Self { full_metadata: full_mmap, stub_metadata: stub, _temp_dir: temp_dir })
}

#[inline]
pub fn full(&self) -> &[u8] {
&self.full_metadata.as_deref().unwrap_or_default()
}

#[inline]
pub fn raw_data(&self) -> &[u8] {
self.mmap.as_deref().unwrap_or_default()
pub fn stub_or_full(&self) -> &[u8] {
self.stub_metadata.as_deref().unwrap_or(self.full())
}
}

impl<S: Encoder> Encodable<S> for EncodedMetadata {
fn encode(&self, s: &mut S) {
let slice = self.raw_data();
self.stub_metadata.encode(s);

let slice = self.full();
slice.encode(s)
}
}

impl<D: Decoder> Decodable<D> for EncodedMetadata {
fn decode(d: &mut D) -> Self {
let stub = <Option<Vec<u8>>>::decode(d);

let len = d.read_usize();
let mmap = if len > 0 {
let full_metadata = if len > 0 {
let mut mmap = MmapMut::map_anon(len).unwrap();
mmap.copy_from_slice(d.read_raw_bytes(len));
Some(mmap.make_read_only().unwrap())
} else {
None
};

Self { mmap, _temp_dir: None }
Self { full_metadata, stub_metadata: stub, _temp_dir: None }
}
}

pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) {
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");

// Since encoding metadata is not in a query, and nothing is cached,
Expand All @@ -2296,6 +2318,42 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
}

with_encode_metadata_header(tcx, path, |ecx| {
// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();

// Flush buffer to ensure backing file has the correct size.
ecx.opaque.flush();
// Record metadata size for self-profiling
tcx.prof.artifact_size(
"crate_metadata",
"crate_metadata",
ecx.opaque.file().metadata().unwrap().len(),
);

root.position.get()
});

if let Some(ref_path) = ref_path {
with_encode_metadata_header(tcx, ref_path, |ecx| {
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
name: tcx.crate_name(LOCAL_CRATE),
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: false,
is_stub: true,
});
header.position.get()
});
}
}

fn with_encode_metadata_header(
tcx: TyCtxt<'_>,
path: &Path,
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
) {
let mut encoder = opaque::FileEncoder::new(path)
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
encoder.emit_raw_bytes(METADATA_HEADER);
Expand Down Expand Up @@ -2330,9 +2388,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
// Encode the rustc version string in a predictable location.
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);

// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();
let root_position = f(&mut ecx);

// Make sure we report any errors from writing to the file.
// If we forget this, compilation can succeed with an incomplete rmeta file,
Expand All @@ -2342,12 +2398,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
}

let file = ecx.opaque.file();
if let Err(err) = encode_root_position(file, root.position.get()) {
if let Err(err) = encode_root_position(file, root_position) {
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
}

// Record metadata size for self-profiling
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
}

fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,12 @@ pub(crate) struct CrateHeader {
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
/// time ProcMacroData changes.
pub(crate) is_proc_macro_crate: bool,
/// Whether this crate metadata section is just a stub.
/// Stubs do not contain the full metadata (it will be typically stored
/// in a separate rmeta file).
///
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
pub(crate) is_stub: bool,
}

/// Serialized `.rmeta` data for a crate.
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2482,6 +2482,8 @@ written to standard error output)"),
by the linker"),
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
"enable LTO unit splitting (default: no)"),
split_metadata: bool = (false, parse_bool, [TRACKED],
"split metadata out of libraries into .rmeta files"),
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]
Expand Down
3 changes: 3 additions & 0 deletions src/doc/unstable-book/src/compiler-flags/split_metadata.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## `split-metadata`

This option instructs `rustc` to only include a stub metadata section in `rlib` and `dylib` crate types instead of full metadata, to reduce their size on disk. You will probably want to combine this option with `--emit=metadata` to produce the full metadata into a separate `.rmeta` file.
1 change: 1 addition & 0 deletions tests/run-make/split-metadata/dep1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub fn func_dep1() {}
5 changes: 5 additions & 0 deletions tests/run-make/split-metadata/foo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
extern crate dep1;

fn main() {
dep1::func_dep1();
}
Loading
Loading