Skip to content

Commit 0eb1b8f

Browse files
authored
Rollup merge of rust-lang#135395 - saethlin:compiler-builtins-cgus, r=bjorn3
Enforce the compiler-builtins partitioning scheme compiler-builtins needs every intrinsic in its own CGU. Currently, the compiler-builtins crate puts every intrinsic in its own inline module then `library/Cargo.toml` uses a profile override so that when we build the sysroot, compiler-builtins is built with more `codegen-units` than we have intrinsics, and partitioning never merges two intrinsics together. This approach does not work with `-Zbuild-std` because the profile override gets ignored. And it's kludgey anyway, our own standard library should not be fighting with our own compiler in an attempt to override its behavior. We should change the compiler's behavior to do the right thing in the first place. So that's what this PR does. There's some light refactoring of the CGU partitioning code, then in 3 places I've added a check for `is_compiler_builtins`: * There's a special case now in `cross_crate_inlinable`; every function in compiler-builtins that is not `#[no_mangle]` is made cross-crate-inlinable, which ensures we do not run into problems inlining helpers into intrinsics such as rust-lang#73135 * When building compiler-builtins, the name of the CGU that a MonoItem is given is just the MonoItem's symbol name. This puts every GloballyShared item in its own CGU. * Then when building compiler-builtins, we skip CGU merging. That should ensure that we have one object file per intrinsic, and if optimizations are enabled, there should be no extra extra CGUs full of helper functions (which is what currently happens in the precompiled standard library we distribute, my nightly libcompiler_builtins.rlib for x86_64-unknown-linux-gnu has 174 CGUs and with this PR we have 150).
2 parents 3c6e5de + e17debb commit 0eb1b8f

File tree

9 files changed

+166
-35
lines changed

9 files changed

+166
-35
lines changed

compiler/rustc_mir_transform/src/cross_crate_inline.rs

+9
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
3434
return true;
3535
}
3636

37+
// compiler-builtins only defines intrinsics (which are handled above by checking
38+
// contains_extern_indicator) and helper functions used by those intrinsics. The helper
39+
// functions should always be inlined into intrinsics that use them. This check does not
40+
// guarantee that we get the optimizations we want, but it makes them *much* easier.
41+
// See https://github.com/rust-lang/rust/issues/73135
42+
if tcx.is_compiler_builtins(rustc_span::def_id::LOCAL_CRATE) {
43+
return true;
44+
}
45+
3746
if tcx.has_attr(def_id, sym::rustc_intrinsic) {
3847
// Intrinsic fallback bodies are always cross-crate inlineable.
3948
// To ensure that the MIR inliner doesn't cluelessly try to inline fallback

compiler/rustc_monomorphize/src/partitioning.rs

+40-21
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ where
167167
// estimates.
168168
{
169169
let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
170-
merge_codegen_units(cx, &mut codegen_units);
170+
let cgu_contents = merge_codegen_units(cx, &mut codegen_units);
171+
rename_codegen_units(cx, &mut codegen_units, cgu_contents);
171172
debug_dump(tcx, "MERGE", &codegen_units);
172173
}
173174

@@ -202,7 +203,6 @@ where
202203
I: Iterator<Item = MonoItem<'tcx>>,
203204
{
204205
let mut codegen_units = UnordMap::default();
205-
let is_incremental_build = cx.tcx.sess.opts.incremental.is_some();
206206
let mut internalization_candidates = UnordSet::default();
207207

208208
// Determine if monomorphizations instantiated in this crate will be made
@@ -229,20 +229,8 @@ where
229229
}
230230
}
231231

232-
let characteristic_def_id = characteristic_def_id_of_mono_item(cx.tcx, mono_item);
233-
let is_volatile = is_incremental_build && mono_item.is_generic_fn();
234-
235-
let cgu_name = match characteristic_def_id {
236-
Some(def_id) => compute_codegen_unit_name(
237-
cx.tcx,
238-
cgu_name_builder,
239-
def_id,
240-
is_volatile,
241-
cgu_name_cache,
242-
),
243-
None => fallback_cgu_name(cgu_name_builder),
244-
};
245-
232+
let cgu_name =
233+
compute_codegen_unit_name(cx.tcx, cgu_name_builder, mono_item, cgu_name_cache);
246234
let cgu = codegen_units.entry(cgu_name).or_insert_with(|| CodegenUnit::new(cgu_name));
247235

248236
let mut can_be_internalized = true;
@@ -329,7 +317,7 @@ where
329317
fn merge_codegen_units<'tcx>(
330318
cx: &PartitioningCx<'_, 'tcx>,
331319
codegen_units: &mut Vec<CodegenUnit<'tcx>>,
332-
) {
320+
) -> UnordMap<Symbol, Vec<Symbol>> {
333321
assert!(cx.tcx.sess.codegen_units().as_usize() >= 1);
334322

335323
// A sorted order here ensures merging is deterministic.
@@ -339,6 +327,13 @@ fn merge_codegen_units<'tcx>(
339327
let mut cgu_contents: UnordMap<Symbol, Vec<Symbol>> =
340328
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
341329

330+
// When compiling compiler_builtins, we do not want to put multiple intrinsics in a CGU.
331+
// There may be mergeable CGUs under this constraint, but just skipping over merging is much
332+
// simpler.
333+
if cx.tcx.is_compiler_builtins(LOCAL_CRATE) {
334+
return cgu_contents;
335+
}
336+
342337
// If N is the maximum number of CGUs, and the CGUs are sorted from largest
343338
// to smallest, we repeatedly find which CGU in codegen_units[N..] has the
344339
// greatest overlap of inlined items with codegen_units[N-1], merge that
@@ -429,6 +424,14 @@ fn merge_codegen_units<'tcx>(
429424
// Don't update `cgu_contents`, that's only for incremental builds.
430425
}
431426

427+
cgu_contents
428+
}
429+
430+
fn rename_codegen_units<'tcx>(
431+
cx: &PartitioningCx<'_, 'tcx>,
432+
codegen_units: &mut Vec<CodegenUnit<'tcx>>,
433+
cgu_contents: UnordMap<Symbol, Vec<Symbol>>,
434+
) {
432435
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
433436

434437
// Rename the newly merged CGUs.
@@ -686,13 +689,26 @@ fn characteristic_def_id_of_mono_item<'tcx>(
686689
}
687690
}
688691

689-
fn compute_codegen_unit_name(
690-
tcx: TyCtxt<'_>,
692+
fn compute_codegen_unit_name<'tcx>(
693+
tcx: TyCtxt<'tcx>,
691694
name_builder: &mut CodegenUnitNameBuilder<'_>,
692-
def_id: DefId,
693-
volatile: bool,
695+
mono_item: MonoItem<'tcx>,
694696
cache: &mut CguNameCache,
695697
) -> Symbol {
698+
// When compiling compiler_builtins, we do not want to put multiple intrinsics in a CGU.
699+
// Using the symbol name as the CGU name puts every GloballyShared item in its own CGU, but in
700+
// an optimized build we actually want every item in the crate that isn't an intrinsic to get
701+
// LocalCopy so that it is easy to inline away. In an unoptimized build, this CGU naming
702+
// strategy probably generates more CGUs than we strictly need. But it is simple.
703+
if tcx.is_compiler_builtins(LOCAL_CRATE) {
704+
let name = mono_item.symbol_name(tcx);
705+
return Symbol::intern(name.name);
706+
}
707+
708+
let Some(def_id) = characteristic_def_id_of_mono_item(tcx, mono_item) else {
709+
return fallback_cgu_name(name_builder);
710+
};
711+
696712
// Find the innermost module that is not nested within a function.
697713
let mut current_def_id = def_id;
698714
let mut cgu_def_id = None;
@@ -720,6 +736,9 @@ fn compute_codegen_unit_name(
720736

721737
let cgu_def_id = cgu_def_id.unwrap();
722738

739+
let is_incremental_build = tcx.sess.opts.incremental.is_some();
740+
let volatile = is_incremental_build && mono_item.is_generic_fn();
741+
723742
*cache.entry((cgu_def_id, volatile)).or_insert_with(|| {
724743
let def_path = tcx.def_path(cgu_def_id);
725744

library/Cargo.toml

-13
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,6 @@ exclude = [
1212
"windows_targets"
1313
]
1414

15-
[profile.release.package.compiler_builtins]
16-
# For compiler-builtins we always use a high number of codegen units.
17-
# The goal here is to place every single intrinsic into its own object
18-
# file to avoid symbol clashes with the system libgcc if possible. Note
19-
# that this number doesn't actually produce this many object files, we
20-
# just don't create more than this number of object files.
21-
#
22-
# It's a bit of a bummer that we have to pass this here, unfortunately.
23-
# Ideally this would be specified through an env var to Cargo so Cargo
24-
# knows how many CGUs are for this specific crate, but for now
25-
# per-crate configuration isn't specifiable in the environment.
26-
codegen-units = 10000
27-
2815
# These dependencies of the standard library implement symbolication for
2916
# backtraces on most platforms. Their debuginfo causes both linking to be slower
3017
# (more data to chew through) and binaries to be larger without really all that

tests/run-make/compiler-builtins/rmake.rs tests/run-make/compiler-builtins-linkage/rmake.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
//! The compiler_builtins library is special. It can call functions in core, but it must not
1+
//! The compiler_builtins library is special. When linkers are passed multiple libraries, they
2+
//! expect undefined symbols mentioned by libraries on the left to be defined in libraries to the
3+
//! right. Since calls to compiler_builtins may be inserted during codegen, it is placed all the way
4+
//! to the right. Therefore, compiler_builtins can call functions in core but it must not
25
//! require linkage against a build of core. If it ever does, building the standard library *may*
36
//! result in linker errors, depending on whether the linker in use applies optimizations first or
47
//! resolves symbols first. So the portable and safe approach is to forbid such a linkage
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[package]
2+
name = "scratch"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[lib]
7+
path = "lib.rs"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#![no_std]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
//! The compiler_builtins library is special. It exists to export a number of intrinsics which may
2+
//! also be provided by libgcc or compiler-rt, and when an intrinsic is provided by another
3+
//! library, we want that definition to override the one in compiler_builtins because we expect
4+
//! that those implementations are more optimized than compiler_builtins. To make sure that an
5+
//! attempt to override a compiler_builtins intrinsic does not result in a multiple definitions
6+
//! linker error, the compiler has special CGU partitioning logic for compiler_builtins that
7+
//! ensures every intrinsic gets its own CGU.
8+
//!
9+
//! This test is slightly overfit to the current compiler_builtins CGU naming strategy; it doesn't
10+
//! distinguish between "multiple intrinsics are in one object file!" which would be very bad, and
11+
//! "This object file has an intrinsic and also some of its helper functions!" which would be okay.
12+
//!
13+
//! This test ensures that the compiler_builtins rlib has only one intrinsic in each object file.
14+
15+
// wasm and nvptx targets don't produce rlib files that object can parse.
16+
//@ ignore-wasm
17+
//@ ignore-nvptx64
18+
19+
#![deny(warnings)]
20+
21+
use std::str;
22+
23+
use run_make_support::object::read::Object;
24+
use run_make_support::object::read::archive::ArchiveFile;
25+
use run_make_support::object::{ObjectSymbol, SymbolKind};
26+
use run_make_support::rfs::{read, read_dir};
27+
use run_make_support::{cargo, object, path, target};
28+
29+
fn main() {
30+
println!("Testing compiler_builtins CGU partitioning for {}", target());
31+
32+
// CGU partitioning has some special cases for codegen-units=1, so we also test 2 CGUs.
33+
for cgus in [1, 2] {
34+
for profile in ["debug", "release"] {
35+
run_test(profile, cgus);
36+
}
37+
}
38+
}
39+
40+
fn run_test(profile: &str, cgus: usize) {
41+
println!("Testing with profile {profile} and -Ccodegen-units={cgus}");
42+
43+
let target_dir = path("target");
44+
45+
let mut cmd = cargo();
46+
cmd.args(&[
47+
"build",
48+
"--manifest-path",
49+
"Cargo.toml",
50+
"-Zbuild-std=core",
51+
"--target",
52+
&target(),
53+
])
54+
.env("RUSTFLAGS", &format!("-Ccodegen-units={cgus}"))
55+
.env("CARGO_TARGET_DIR", &target_dir)
56+
.env("RUSTC_BOOTSTRAP", "1")
57+
// Visual Studio 2022 requires that the LIB env var be set so it can
58+
// find the Windows SDK.
59+
.env("LIB", std::env::var("LIB").unwrap_or_default());
60+
if profile == "release" {
61+
cmd.arg("--release");
62+
}
63+
cmd.run();
64+
65+
let rlibs_path = target_dir.join(target()).join(profile).join("deps");
66+
let compiler_builtins_rlib = read_dir(rlibs_path)
67+
.find_map(|e| {
68+
let path = e.unwrap().path();
69+
let file_name = path.file_name().unwrap().to_str().unwrap();
70+
if file_name.starts_with("libcompiler_builtins") && file_name.ends_with(".rlib") {
71+
Some(path)
72+
} else {
73+
None
74+
}
75+
})
76+
.unwrap();
77+
78+
// rlib files are archives, where the archive members are our CGUs, and we also have one called
79+
// lib.rmeta which is the encoded metadata. Each of the CGUs is an object file.
80+
let data = read(compiler_builtins_rlib);
81+
82+
let archive = ArchiveFile::parse(&*data).unwrap();
83+
for member in archive.members() {
84+
let member = member.unwrap();
85+
if member.name() == b"lib.rmeta" {
86+
continue;
87+
}
88+
let data = member.data(&*data).unwrap();
89+
let object = object::File::parse(&*data).unwrap();
90+
91+
let mut global_text_symbols = 0;
92+
println!("Inspecting object {}", str::from_utf8(&member.name()).unwrap());
93+
for symbol in object
94+
.symbols()
95+
.filter(|symbol| matches!(symbol.kind(), SymbolKind::Text))
96+
.filter(|symbol| symbol.is_definition() && symbol.is_global())
97+
{
98+
println!("symbol: {:?}", symbol.name().unwrap());
99+
global_text_symbols += 1;
100+
}
101+
// Assert that this object/CGU does not define multiple global text symbols.
102+
// We permit the 0 case because some CGUs may only be assigned a static.
103+
assert!(global_text_symbols <= 1);
104+
}
105+
}

0 commit comments

Comments
 (0)