Skip to content

Commit 87156e4

Browse files
committed
Handle compressed debug sections in ELF files
ELF files allow debug info sections to be compressed. The libbacktrace backed supported these compressed sections, but the Gimli backend did not. This commit adds that support to the Gimli backend. In my tests these debug info sections do not obey the alignment requirements that the object crate expects for the gABI compression header (nor can I find a source documenting any alignment requirements), so this commit additionally enables the "unaligned" feature in the upcoming version of the object crate. There is a bit of unsafe to ensure the lifetime of the decompressed sections matches the lifetime of the mmap'd file. I don't think there is a way around this unsafe code, unless we are willing to ditch Gimli's EndianSlice for an (apparently slower) EndianReader backed by a Cow<[u8]>. Fix #342.
1 parent d4f24b1 commit 87156e4

File tree

8 files changed

+142
-44
lines changed

8 files changed

+142
-44
lines changed

.github/workflows/main.yml

+4
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ jobs:
7878
- run: cargo test --features gimli-symbolize --manifest-path crates/without_debuginfo/Cargo.toml
7979
- run: cargo test --manifest-path crates/line-tables-only/Cargo.toml --features libbacktrace
8080
- run: cargo test --manifest-path crates/line-tables-only/Cargo.toml --features gimli-symbolize
81+
- run: RUSTFLAGS="-C link-arg=-Wl,--compress-debug-sections=zlib-gabi" cargo test --features gimli-symbolize
82+
if: contains(matrix.os, 'ubuntu')
83+
- run: RUSTFLAGS="-C link-arg=-Wl,--compress-debug-sections=zlib-gnu" cargo test --features gimli-symbolize
84+
if: contains(matrix.os, 'ubuntu')
8185

8286
windows_arm64:
8387
name: Windows AArch64

Cargo.toml

+4-3
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,12 @@ compiler_builtins = { version = '0.1.2', optional = true }
3939
# Optional dependencies enabled through the `gimli-symbolize` feature, do not
4040
# use these features directly.
4141
addr2line = { version = "0.12.0", optional = true, default-features = false }
42+
flate2 = { version = "1.0.14", optional = true }
4243
[dependencies.object]
43-
version = "0.19"
44+
git = "https://github.com/gimli-rs/object.git"
4445
optional = true
4546
default-features = false
46-
features = ['read_core', 'elf', 'macho', 'pe']
47+
features = ['read_core', 'elf', 'macho', 'pe', 'unaligned']
4748

4849
[target.'cfg(windows)'.dependencies]
4950
winapi = { version = "0.3.3", optional = true }
@@ -71,7 +72,7 @@ std = []
7172
# be affected by feature selection here. Also note that it's highly unlikely you
7273
# want to configure this. If you're having trouble getting backtraces it's
7374
# likely best to open an issue.
74-
gimli-symbolize = ["addr2line", "object", "std"]
75+
gimli-symbolize = ["addr2line", "flate2", "object", "std"]
7576
libbacktrace = ["backtrace-sys/backtrace-sys"]
7677

7778
#=======================================

src/symbolize/gimli.rs

+20-16
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use self::gimli::read::EndianSlice;
88
use self::gimli::LittleEndian as Endian;
99
use self::mmap::Mmap;
10+
use self::stash::Stash;
1011
use crate::symbolize::ResolveWhat;
1112
use crate::types::BytesOrWideString;
1213
use crate::SymbolName;
@@ -20,6 +21,7 @@ use std::fs::File;
2021
use std::path::Path;
2122
use std::prelude::v1::*;
2223

24+
mod stash;
2325
#[cfg(windows)]
2426
#[path = "gimli/mmap_windows.rs"]
2527
mod mmap;
@@ -38,44 +40,46 @@ struct Mapping {
3840
// 'static lifetime is a lie to hack around lack of support for self-referential structs.
3941
cx: Context<'static>,
4042
_map: Mmap,
43+
_stash: Stash,
4144
}
4245

43-
fn cx<'data>(object: Object<'data>) -> Option<Context<'data>> {
44-
fn load_section<'data, S>(obj: &Object<'data>) -> S
46+
fn cx<'data>(stash: &'data Stash, object: Object<'data>) -> Option<Context<'data>> {
47+
fn load_section<'data, S>(stash: &'data Stash, obj: &Object<'data>) -> S
4548
where
4649
S: gimli::Section<gimli::EndianSlice<'data, Endian>>,
4750
{
48-
let data = obj.section(S::section_name()).unwrap_or(&[]);
51+
let data = obj.section(stash, S::section_name()).unwrap_or(&[]);
4952
S::from(EndianSlice::new(data, Endian))
5053
}
5154

5255
let dwarf = addr2line::Context::from_sections(
53-
load_section(&object),
54-
load_section(&object),
55-
load_section(&object),
56-
load_section(&object),
57-
load_section(&object),
58-
load_section(&object),
59-
load_section(&object),
60-
load_section(&object),
61-
load_section(&object),
56+
load_section(stash, &object),
57+
load_section(stash, &object),
58+
load_section(stash, &object),
59+
load_section(stash, &object),
60+
load_section(stash, &object),
61+
load_section(stash, &object),
62+
load_section(stash, &object),
63+
load_section(stash, &object),
64+
load_section(stash, &object),
6265
gimli::EndianSlice::new(&[], Endian),
6366
)
6467
.ok()?;
6568
Some(Context { dwarf, object })
6669
}
6770

6871
macro_rules! mk {
69-
(Mapping { $map:expr, $inner:expr }) => {{
72+
(Mapping { $map:expr, $inner:expr, $stash:expr }) => {{
7073
use crate::symbolize::gimli::{Context, Mapping, Mmap};
7174

72-
fn assert_lifetimes<'a>(_: &'a Mmap, _: &Context<'a>) {}
73-
assert_lifetimes(&$map, &$inner);
75+
fn assert_lifetimes<'a>(_: &'a Mmap, _: &Context<'a>, _: &'a Stash) {}
76+
assert_lifetimes(&$map, &$inner, &$stash);
7477
Mapping {
7578
// Convert to 'static lifetimes since the symbols should
76-
// only borrow `map` and we're preserving `map` below.
79+
// only borrow `map` and `stash` and we're preserving them below.
7780
cx: unsafe { core::mem::transmute::<Context<'_>, Context<'static>>($inner) },
7881
_map: $map,
82+
_stash: $stash,
7983
}
8084
}};
8185
}

src/symbolize/gimli/coff.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::{Mapping, Path, Vec};
1+
use super::{Mapping, Path, Stash, Vec};
22
use object::pe::{ImageDosHeader, ImageSymbol};
33
use object::read::pe::{ImageNtHeaders, ImageOptionalHeader, SectionTable};
44
use object::read::StringTable;
@@ -13,8 +13,9 @@ use std::convert::TryFrom;
1313
impl Mapping {
1414
pub fn new(path: &Path) -> Option<Mapping> {
1515
let map = super::mmap(path)?;
16-
let cx = super::cx(Object::parse(&map)?)?;
17-
Some(mk!(Mapping { map, cx }))
16+
let stash = Stash::new();
17+
let cx = super::cx(&stash, Object::parse(&map)?)?;
18+
Some(mk!(Mapping { map, cx, stash }))
1819
}
1920
}
2021

@@ -66,7 +67,7 @@ impl<'a> Object<'a> {
6667
})
6768
}
6869

69-
pub fn section(&self, name: &str) -> Option<&'a [u8]> {
70+
pub fn section(&self, _: &Stash, name: &str) -> Option<&'a [u8]> {
7071
Some(
7172
self.sections
7273
.section_by_name(self.strings, name.as_bytes())?

src/symbolize/gimli/elf.rs

+69-14
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use super::{Mapping, Path, Vec};
2-
use object::read::elf::{FileHeader, SectionHeader, SectionTable, Sym};
1+
use super::{Mapping, Stash, Path, Vec};
2+
use object::elf::{ELFCOMPRESS_ZLIB, SHF_COMPRESSED};
3+
use object::read::elf::{CompressionHeader, FileHeader, SectionHeader, SectionTable, Sym};
34
use object::read::StringTable;
4-
use object::{Bytes, NativeEndian};
5+
use object::{BigEndian, Bytes, NativeEndian};
56

67
#[cfg(target_pointer_width = "32")]
78
type Elf = object::elf::FileHeader32<NativeEndian>;
@@ -11,8 +12,9 @@ type Elf = object::elf::FileHeader64<NativeEndian>;
1112
impl Mapping {
1213
pub fn new(path: &Path) -> Option<Mapping> {
1314
let map = super::mmap(path)?;
14-
let cx = super::cx(Object::parse(&map)?)?;
15-
Some(mk!(Mapping { map, cx }))
15+
let stash = Stash::new();
16+
let cx = super::cx(&stash, Object::parse(&map)?)?;
17+
Some(mk!(Mapping { map, cx, stash }))
1618
}
1719
}
1820

@@ -87,15 +89,59 @@ impl<'a> Object<'a> {
8789
})
8890
}
8991

90-
pub fn section(&self, name: &str) -> Option<&'a [u8]> {
91-
Some(
92-
self.sections
93-
.section_by_name(self.endian, name.as_bytes())?
94-
.1
95-
.data(self.endian, self.data)
96-
.ok()?
97-
.0,
98-
)
92+
pub fn section(&self, stash: &'a Stash, name: &str) -> Option<&'a [u8]> {
93+
if let Some(section) = self.section_header(name) {
94+
let mut data = section.data(self.endian, self.data).ok()?;
95+
96+
// Check for DWARF-standard (gABI) compression, i.e., as generated
97+
// by ld's `--compress-debug-sections=zlib-gabi` flag.
98+
let flags: u64 = section.sh_flags(self.endian).into();
99+
if (flags & u64::from(SHF_COMPRESSED)) == 0 {
100+
// Not compressed.
101+
return Some(data.0);
102+
}
103+
104+
let header = data.read::<<Elf as FileHeader>::CompressionHeader>().ok()?;
105+
if header.ch_type(self.endian) != ELFCOMPRESS_ZLIB {
106+
// Zlib compression is the only known type.
107+
return None;
108+
}
109+
if header.ch_addralign(self.endian) != 1 {
110+
// We don't presently decompress into an aligned buffer, so be
111+
// defensive and ensure there are no alignment requirements on
112+
// the decompressed data. Empirical evidence suggests that debug
113+
// sections don't have alignment requirements.
114+
return None;
115+
}
116+
let size = header.ch_size(self.endian) as usize;
117+
let buf = decompress_zlib(data.0, size)?;
118+
println!("stashing for {}", name);
119+
return Some(stash.stash(buf));
120+
}
121+
122+
// Check for the nonstandard GNU compression format, i.e., as generated
123+
// by ld's `--compress-debug-sections=zlib-gnu` flag.
124+
if !name.starts_with(".debug_") {
125+
return None;
126+
}
127+
let zdebug_name = format!(".zdebug_{}", &name[7..]);
128+
if let Some(section) = self.section_header(&zdebug_name) {
129+
let mut data = section.data(self.endian, self.data).ok()?;
130+
if data.read_bytes(8).ok()?.0 != b"ZLIB\0\0\0\0" {
131+
return None;
132+
}
133+
let size = data.read::<object::U32Bytes<_>>().ok()?.get(BigEndian);
134+
let buf = decompress_zlib(data.0, size as usize)?;
135+
return Some(stash.stash(buf));
136+
}
137+
138+
None
139+
}
140+
141+
fn section_header(&self, name: &str) -> Option<&<Elf as FileHeader>::SectionHeader> {
142+
self.sections
143+
.section_by_name(self.endian, name.as_bytes())
144+
.map(|(_index, section)| section)
99145
}
100146

101147
pub fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> {
@@ -112,3 +158,12 @@ impl<'a> Object<'a> {
112158
}
113159
}
114160
}
161+
162+
fn decompress_zlib(data: &[u8], size: usize) -> Option<Vec<u8>> {
163+
let mut buf = Vec::with_capacity(size);
164+
let header_expected = true;
165+
flate2::Decompress::new(header_expected)
166+
.decompress_vec(data, &mut buf, flate2::FlushDecompress::Finish)
167+
.ok()?;
168+
Some(buf)
169+
}

src/symbolize/gimli/macho.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::{Mapping, Path, Vec};
1+
use super::{Mapping, Path, Stash, Vec};
22
use core::convert::TryInto;
33
use object::macho;
44
use object::read::macho::{MachHeader, Nlist, Section, Segment as _};
@@ -49,8 +49,9 @@ impl Mapping {
4949
// Looks like nothing matched our UUID, so let's at least return our own
5050
// file. This should have the symbol table for at least some
5151
// symbolication purposes.
52-
let inner = super::cx(Object::parse(macho, endian, data)?)?;
53-
return Some(mk!(Mapping { map, inner }));
52+
let stash = Stash::new();
53+
let inner = super::cx(&stash, Object::parse(macho, endian, data)?)?;
54+
return Some(mk!(Mapping { map, inner, stash }));
5455

5556
fn load_dsym(dir: &Path, uuid: [u8; 16]) -> Option<Mapping> {
5657
for entry in dir.read_dir().ok()? {
@@ -62,8 +63,9 @@ impl Mapping {
6263
if entry_uuid != uuid {
6364
continue;
6465
}
65-
if let Some(cx) = Object::parse(macho, endian, data).and_then(super::cx) {
66-
return Some(mk!(Mapping { map, cx }));
66+
let stash = Stash::new();
67+
if let Some(cx) = Object::parse(macho, endian, data).and_then(|o| super::cx(stash, o)) {
68+
return Some(mk!(Mapping { map, cx, stash }));
6769
}
6870
}
6971

@@ -175,7 +177,7 @@ impl<'a> Object<'a> {
175177
})
176178
}
177179

178-
pub fn section(&self, name: &str) -> Option<&'a [u8]> {
180+
pub fn section(&self, _: &Stash, name: &str) -> Option<&'a [u8]> {
179181
let name = name.as_bytes();
180182
let dwarf = self.dwarf?;
181183
let section = dwarf.into_iter().find(|section| {

src/symbolize/gimli/mmap_unix.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ impl Mmap {
2222
if ptr == libc::MAP_FAILED {
2323
return None;
2424
}
25-
Some(Mmap { ptr, len })
25+
Some(Mmap {
26+
ptr,
27+
len,
28+
})
2629
}
2730
}
2831

src/symbolize/gimli/stash.rs

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
use std::cell::UnsafeCell;
2+
use std::vec::Vec;
3+
4+
pub struct Stash {
5+
/// Additional byte vectors that need to live as long as the mmap.
6+
buffers: UnsafeCell<Vec<Vec<u8>>>,
7+
}
8+
9+
impl Stash {
10+
pub fn new() -> Stash {
11+
Stash {
12+
buffers: UnsafeCell::new(Vec::new()),
13+
}
14+
}
15+
16+
/// Takes ownership of `buf` and returns a reference to its contents that
17+
/// lives as long as the `Stash` does.
18+
pub fn stash(&self, buf: Vec<u8>) -> &[u8] {
19+
// SAFETY: this is the only function that ever constructs a mutable
20+
// reference to `self.buffers`.
21+
let buffers = unsafe { &mut *self.buffers.get() };
22+
let i = buffers.len();
23+
buffers.push(buf);
24+
// SAFETY: we never remove elements from `self.buffers`, so a reference
25+
// to the data inside any buffer will live as long as `Mmap` does.
26+
&buffers[i]
27+
}
28+
}

0 commit comments

Comments
 (0)