Skip to content

Commit 219c5d3

Browse files
authored
Rollup merge of rust-lang#92142 - wesleywiser:fix_codecoverage_partitioning, r=tmandry
[code coverage] Fix missing dead code in modules that are never called The issue here is that the logic used to determine which CGU to put the dead function stubs in doesn't handle cases where a module is never assigned to a CGU (which is what happens when all of the code in the module is dead). The partitioning logic also caused issues in rust-lang#85461 where inline functions were duplicated into multiple CGUs resulting in duplicate symbols. This commit fixes the issue by removing the complex logic used to assign dead code stubs to CGUs and replaces it with a much simpler model: we pick one CGU to hold all the dead code stubs. We pick a CGU which has exported items which increases the likelihood the linker won't throw away our dead functions and we pick the smallest to minimize the impact on compilation times for crates with very large CGUs. Fixes rust-lang#91661 Fixes rust-lang#86177 Fixes rust-lang#85718 Fixes rust-lang#79622 r? `@tmandry` cc `@richkadel` This PR is not urgent so please don't let it interrupt your holidays! 🎄 🎁
2 parents ccfcf42 + e9cac4c commit 219c5d3

File tree

9 files changed

+103
-118
lines changed

9 files changed

+103
-118
lines changed

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

+34-86
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ use crate::llvm;
55
use llvm::coverageinfo::CounterMappingRegion;
66
use rustc_codegen_ssa::coverageinfo::map::{Counter, CounterExpression};
77
use rustc_codegen_ssa::traits::{ConstMethods, CoverageInfoMethods};
8-
use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet};
9-
use rustc_hir::def_id::{DefId, DefIdSet};
8+
use rustc_data_structures::fx::FxIndexSet;
9+
use rustc_hir::def::DefKind;
10+
use rustc_hir::def_id::DefIdSet;
1011
use rustc_llvm::RustString;
12+
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
1113
use rustc_middle::mir::coverage::CodeRegion;
1214
use rustc_middle::ty::TyCtxt;
13-
use rustc_span::Symbol;
1415

1516
use std::ffi::CString;
1617

@@ -46,7 +47,7 @@ pub fn finalize<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) {
4647
// functions exist. Generate synthetic functions with a (required) single counter, and add the
4748
// MIR `Coverage` code regions to the `function_coverage_map`, before calling
4849
// `ctx.take_function_coverage_map()`.
49-
if !tcx.sess.instrument_coverage_except_unused_functions() {
50+
if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
5051
add_unused_functions(cx);
5152
}
5253

@@ -271,26 +272,35 @@ fn save_function_record(
271272
/// `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s (`tcx` query
272273
/// `codegened_and_inlined_items`).
273274
///
274-
/// *HOWEVER* the codegenned `DefId`s are partitioned across multiple `CodegenUnit`s (CGUs), and
275-
/// this function is processing a `function_coverage_map` for the functions (`Instance`/`DefId`)
276-
/// allocated to only one of those CGUs. We must NOT inject any unused functions's `CodeRegion`s
277-
/// more than once, so we have to pick a CGUs `function_coverage_map` into which the unused
278-
/// function will be inserted.
275+
/// These unused functions are then codegen'd in one of the CGUs which is marked as the
276+
/// "code coverage dead code cgu" during the partitioning process. This prevents us from generating
277+
/// code regions for the same function more than once which can lead to linker errors regarding
278+
/// duplicate symbols.
279279
fn add_unused_functions<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) {
280-
let tcx = cx.tcx;
280+
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
281281

282-
// FIXME(#79622): Can this solution be simplified and/or improved? Are there other sources
283-
// of compiler state data that might help (or better sources that could be exposed, but
284-
// aren't yet)?
282+
let tcx = cx.tcx;
285283

286284
let ignore_unused_generics = tcx.sess.instrument_coverage_except_unused_generics();
287285

288-
let all_def_ids: DefIdSet = tcx
286+
let eligible_def_ids: DefIdSet = tcx
289287
.mir_keys(())
290288
.iter()
291289
.filter_map(|local_def_id| {
292290
let def_id = local_def_id.to_def_id();
293-
if ignore_unused_generics && tcx.generics_of(def_id).requires_monomorphization(tcx) {
291+
let kind = tcx.def_kind(def_id);
292+
// `mir_keys` will give us `DefId`s for all kinds of things, not
293+
// just "functions", like consts, statics, etc. Filter those out.
294+
// If `ignore_unused_generics` was specified, filter out any
295+
// generic functions from consideration as well.
296+
if !matches!(
297+
kind,
298+
DefKind::Fn | DefKind::AssocFn | DefKind::Closure | DefKind::Generator
299+
) {
300+
return None;
301+
} else if ignore_unused_generics
302+
&& tcx.generics_of(def_id).requires_monomorphization(tcx)
303+
{
294304
return None;
295305
}
296306
Some(local_def_id.to_def_id())
@@ -299,79 +309,17 @@ fn add_unused_functions<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) {
299309

300310
let codegenned_def_ids = tcx.codegened_and_inlined_items(());
301311

302-
let mut unused_def_ids_by_file: FxHashMap<Symbol, Vec<DefId>> = FxHashMap::default();
303-
for &non_codegenned_def_id in all_def_ids.difference(codegenned_def_ids) {
304-
// Make sure the non-codegenned (unused) function has at least one MIR
305-
// `Coverage` statement with a code region, and return its file name.
306-
if let Some(non_codegenned_file_name) = tcx.covered_file_name(non_codegenned_def_id) {
307-
let def_ids =
308-
unused_def_ids_by_file.entry(*non_codegenned_file_name).or_insert_with(Vec::new);
309-
def_ids.push(non_codegenned_def_id);
310-
}
311-
}
312+
for &non_codegenned_def_id in eligible_def_ids.difference(codegenned_def_ids) {
313+
let codegen_fn_attrs = tcx.codegen_fn_attrs(non_codegenned_def_id);
312314

313-
if unused_def_ids_by_file.is_empty() {
314-
// There are no unused functions with file names to add (in any CGU)
315-
return;
316-
}
317-
318-
// Each `CodegenUnit` (CGU) has its own function_coverage_map, and generates a specific binary
319-
// with its own coverage map.
320-
//
321-
// Each covered function `Instance` can be included in only one coverage map, produced from a
322-
// specific function_coverage_map, from a specific CGU.
323-
//
324-
// Since unused functions did not generate code, they are not associated with any CGU yet.
325-
//
326-
// To avoid injecting the unused functions in multiple coverage maps (for multiple CGUs)
327-
// determine which function_coverage_map has the responsibility for publishing unreachable
328-
// coverage, based on file name: For each unused function, find the CGU that generates the
329-
// first function (based on sorted `DefId`) from the same file.
330-
//
331-
// Add a new `FunctionCoverage` to the `function_coverage_map`, with unreachable code regions
332-
// for each region in it's MIR.
333-
334-
// Convert the `HashSet` of `codegenned_def_ids` to a sortable vector, and sort them.
335-
let mut sorted_codegenned_def_ids: Vec<DefId> = codegenned_def_ids.iter().copied().collect();
336-
sorted_codegenned_def_ids.sort_unstable();
337-
338-
let mut first_covered_def_id_by_file: FxHashMap<Symbol, DefId> = FxHashMap::default();
339-
for &def_id in sorted_codegenned_def_ids.iter() {
340-
if let Some(covered_file_name) = tcx.covered_file_name(def_id) {
341-
// Only add files known to have unused functions
342-
if unused_def_ids_by_file.contains_key(covered_file_name) {
343-
first_covered_def_id_by_file.entry(*covered_file_name).or_insert(def_id);
344-
}
315+
// If a function is marked `#[no_coverage]`, then skip generating a
316+
// dead code stub for it.
317+
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NO_COVERAGE) {
318+
debug!("skipping unused fn marked #[no_coverage]: {:?}", non_codegenned_def_id);
319+
continue;
345320
}
346-
}
347-
348-
// Get the set of def_ids with coverage regions, known by *this* CoverageContext.
349-
let cgu_covered_def_ids: DefIdSet = match cx.coverage_context() {
350-
Some(ctx) => ctx
351-
.function_coverage_map
352-
.borrow()
353-
.keys()
354-
.map(|&instance| instance.def.def_id())
355-
.collect(),
356-
None => return,
357-
};
358321

359-
let cgu_covered_files: FxHashSet<Symbol> = first_covered_def_id_by_file
360-
.iter()
361-
.filter_map(
362-
|(&file_name, def_id)| {
363-
if cgu_covered_def_ids.contains(def_id) { Some(file_name) } else { None }
364-
},
365-
)
366-
.collect();
367-
368-
// For each file for which this CGU is responsible for adding unused function coverage,
369-
// get the `def_id`s for each unused function (if any), define a synthetic function with a
370-
// single LLVM coverage counter, and add the function's coverage `CodeRegion`s. to the
371-
// function_coverage_map.
372-
for covered_file_name in cgu_covered_files {
373-
for def_id in unused_def_ids_by_file.remove(&covered_file_name).into_iter().flatten() {
374-
cx.define_unused_fn(def_id);
375-
}
322+
debug!("generating unused fn: {:?}", non_codegenned_def_id);
323+
cx.define_unused_fn(non_codegenned_def_id);
376324
}
377325
}

compiler/rustc_middle/src/mir/mono.rs

+21-1
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,9 @@ pub struct CodegenUnit<'tcx> {
247247
items: FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)>,
248248
size_estimate: Option<usize>,
249249
primary: bool,
250+
/// True if this is CGU is used to hold code coverage information for dead code,
251+
/// false otherwise.
252+
is_code_coverage_dead_code_cgu: bool,
250253
}
251254

252255
/// Specifies the linkage type for a `MonoItem`.
@@ -277,7 +280,13 @@ pub enum Visibility {
277280
impl<'tcx> CodegenUnit<'tcx> {
278281
#[inline]
279282
pub fn new(name: Symbol) -> CodegenUnit<'tcx> {
280-
CodegenUnit { name, items: Default::default(), size_estimate: None, primary: false }
283+
CodegenUnit {
284+
name,
285+
items: Default::default(),
286+
size_estimate: None,
287+
primary: false,
288+
is_code_coverage_dead_code_cgu: false,
289+
}
281290
}
282291

283292
pub fn name(&self) -> Symbol {
@@ -304,6 +313,15 @@ impl<'tcx> CodegenUnit<'tcx> {
304313
&mut self.items
305314
}
306315

316+
pub fn is_code_coverage_dead_code_cgu(&self) -> bool {
317+
self.is_code_coverage_dead_code_cgu
318+
}
319+
320+
/// Marks this CGU as the one used to contain code coverage information for dead code.
321+
pub fn make_code_coverage_dead_code_cgu(&mut self) {
322+
self.is_code_coverage_dead_code_cgu = true;
323+
}
324+
307325
pub fn mangle_name(human_readable_name: &str) -> String {
308326
// We generate a 80 bit hash from the name. This should be enough to
309327
// avoid collisions and is still reasonably short for filenames.
@@ -404,9 +422,11 @@ impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> {
404422
// The size estimate is not relevant to the hash
405423
size_estimate: _,
406424
primary: _,
425+
is_code_coverage_dead_code_cgu,
407426
} = *self;
408427

409428
name.hash_stable(hcx, hasher);
429+
is_code_coverage_dead_code_cgu.hash_stable(hcx, hasher);
410430

411431
let mut items: Vec<(Fingerprint, _)> = items
412432
.iter()

compiler/rustc_middle/src/query/mod.rs

-10
Original file line numberDiff line numberDiff line change
@@ -386,16 +386,6 @@ rustc_queries! {
386386
storage(ArenaCacheSelector<'tcx>)
387387
}
388388

389-
/// Returns the name of the file that contains the function body, if instrumented for coverage.
390-
query covered_file_name(key: DefId) -> Option<Symbol> {
391-
desc {
392-
|tcx| "retrieving the covered file name, if instrumented, for `{}`",
393-
tcx.def_path_str(key)
394-
}
395-
storage(ArenaCacheSelector<'tcx>)
396-
cache_on_disk_if { key.is_local() }
397-
}
398-
399389
/// Returns the `CodeRegions` for a function that has instrumented coverage, in case the
400390
/// function was optimized out before codegen, and before being added to the Coverage Map.
401391
query covered_code_regions(key: DefId) -> Vec<&'tcx mir::coverage::CodeRegion> {

compiler/rustc_mir_transform/src/coverage/query.rs

-20
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use rustc_span::def_id::DefId;
99
/// A `query` provider for retrieving coverage information injected into MIR.
1010
pub(crate) fn provide(providers: &mut Providers) {
1111
providers.coverageinfo = |tcx, def_id| coverageinfo(tcx, def_id);
12-
providers.covered_file_name = |tcx, def_id| covered_file_name(tcx, def_id);
1312
providers.covered_code_regions = |tcx, def_id| covered_code_regions(tcx, def_id);
1413
}
1514

@@ -137,25 +136,6 @@ fn coverageinfo<'tcx>(tcx: TyCtxt<'tcx>, instance_def: ty::InstanceDef<'tcx>) ->
137136
coverage_visitor.info
138137
}
139138

140-
fn covered_file_name(tcx: TyCtxt<'_>, def_id: DefId) -> Option<Symbol> {
141-
if tcx.is_mir_available(def_id) {
142-
let body = mir_body(tcx, def_id);
143-
for bb_data in body.basic_blocks().iter() {
144-
for statement in bb_data.statements.iter() {
145-
if let StatementKind::Coverage(box ref coverage) = statement.kind {
146-
if let Some(code_region) = coverage.code_region.as_ref() {
147-
if is_inlined(body, statement) {
148-
continue;
149-
}
150-
return Some(code_region.file_name);
151-
}
152-
}
153-
}
154-
}
155-
}
156-
return None;
157-
}
158-
159139
fn covered_code_regions<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> Vec<&'tcx CodeRegion> {
160140
let body = mir_body(tcx, def_id);
161141
body.basic_blocks()

compiler/rustc_monomorphize/src/partitioning/mod.rs

+34
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,40 @@ pub fn partition<'tcx>(
201201
partitioner.internalize_symbols(cx, &mut post_inlining);
202202
}
203203

204+
let instrument_dead_code =
205+
tcx.sess.instrument_coverage() && !tcx.sess.instrument_coverage_except_unused_functions();
206+
207+
if instrument_dead_code {
208+
assert!(
209+
post_inlining.codegen_units.len() > 0,
210+
"There must be at least one CGU that code coverage data can be generated in."
211+
);
212+
213+
// Find the smallest CGU that has exported symbols and put the dead
214+
// function stubs in that CGU. We look for exported symbols to increase
215+
// the likelihood the linker won't throw away the dead functions.
216+
// FIXME(#92165): In order to truly resolve this, we need to make sure
217+
// the object file (CGU) containing the dead function stubs is included
218+
// in the final binary. This will probably require forcing these
219+
// function symbols to be included via `-u` or `/include` linker args.
220+
let mut cgus: Vec<_> = post_inlining.codegen_units.iter_mut().collect();
221+
cgus.sort_by_key(|cgu| cgu.size_estimate());
222+
223+
let dead_code_cgu = if let Some(cgu) = cgus
224+
.into_iter()
225+
.rev()
226+
.filter(|cgu| cgu.items().iter().any(|(_, (linkage, _))| *linkage == Linkage::External))
227+
.next()
228+
{
229+
cgu
230+
} else {
231+
// If there are no CGUs that have externally linked items,
232+
// then we just pick the first CGU as a fallback.
233+
&mut post_inlining.codegen_units[0]
234+
};
235+
dead_code_cgu.make_code_coverage_dead_code_cgu();
236+
}
237+
204238
// Finally, sort by codegen unit name, so that we get deterministic results.
205239
let PostInliningPartitioning {
206240
codegen_units: mut result,

src/test/run-make-fulldeps/coverage-reports/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ endif
6464
# if and when we allow `llvm-cov` to produce results for multiple files. Note, the path separators
6565
# appear to be normalized to `/` in those files, thankfully.)
6666
LLVM_COV_IGNORE_FILES=\
67-
--ignore-filename-regex='(uses_crate.rs|uses_inline_crate.rs)'
67+
--ignore-filename-regex='(uses_crate.rs|uses_inline_crate.rs|unused_mod.rs)'
6868

6969
all: $(patsubst $(SOURCEDIR)/lib/%.rs,%,$(wildcard $(SOURCEDIR)/lib/*.rs)) $(patsubst $(SOURCEDIR)/%.rs,%,$(wildcard $(SOURCEDIR)/*.rs))
7070

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
1| 0|pub fn never_called_function() {
2+
2| 0| println!("I am never called");
3+
3| 0|}
4+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pub fn never_called_function() {
2+
println!("I am never called");
3+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#[path = "lib/unused_mod_helper.rs"]
2+
mod unused_module;
3+
4+
fn main() {
5+
println!("hello world!");
6+
}

0 commit comments

Comments
 (0)