From 5af532b0d968d99fafa325ac1f4c44a27ad1f1f5 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Wed, 10 Jun 2020 01:31:25 -0400 Subject: [PATCH] Handle compressed debug sections in ELF files ELF files allow debug info sections to be compressed. The libbacktrace backed supported these compressed sections, but the Gimli backend did not. This commit adds that support to the Gimli backend. In my tests (with the BFD linker, lld, and gold) these debug info sections do not obey the alignment requirements that the object crate expects for the gABI compression header, so this commit additionally enables the "unaligned" feature in the upcoming version of the object crate. There is a bit of unsafe to ensure the lifetime of the decompressed sections matches the lifetime of the mmap'd file. I don't think there is a way around this unsafe code, unless we are willing to ditch Gimli's EndianSlice for an (apparently slower) EndianReader backed by a Cow<[u8]>. Fix #342. --- .github/workflows/main.yml | 4 ++ Cargo.toml | 7 +-- src/symbolize/gimli.rs | 36 ++++++++------- src/symbolize/gimli/coff.rs | 9 ++-- src/symbolize/gimli/elf.rs | 89 ++++++++++++++++++++++++++++++------ src/symbolize/gimli/macho.rs | 16 ++++--- src/symbolize/gimli/stash.rs | 28 ++++++++++++ 7 files changed, 146 insertions(+), 43 deletions(-) create mode 100644 src/symbolize/gimli/stash.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 14a830dd6..bccbbd5a0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -78,6 +78,10 @@ jobs: - run: cargo test --features gimli-symbolize --manifest-path crates/without_debuginfo/Cargo.toml - run: cargo test --manifest-path crates/line-tables-only/Cargo.toml --features libbacktrace - run: cargo test --manifest-path crates/line-tables-only/Cargo.toml --features gimli-symbolize + - run: RUSTFLAGS="-C link-arg=-Wl,--compress-debug-sections=zlib-gabi" cargo test --features gimli-symbolize + if: contains(matrix.os, 'ubuntu') + - run: RUSTFLAGS="-C link-arg=-Wl,--compress-debug-sections=zlib-gnu" cargo test --features gimli-symbolize + if: contains(matrix.os, 'ubuntu') windows_arm64: name: Windows AArch64 diff --git a/Cargo.toml b/Cargo.toml index 66f431d9d..ed6638b49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,11 +39,12 @@ compiler_builtins = { version = '0.1.2', optional = true } # Optional dependencies enabled through the `gimli-symbolize` feature, do not # use these features directly. addr2line = { version = "0.12.0", optional = true, default-features = false } +miniz_oxide = { version = "0.3.7", optional = true } [dependencies.object] -version = "0.19" +git = "https://github.com/gimli-rs/object.git" optional = true default-features = false -features = ['read_core', 'elf', 'macho', 'pe'] +features = ['read_core', 'elf', 'macho', 'pe', 'unaligned'] [target.'cfg(windows)'.dependencies] winapi = { version = "0.3.3", optional = true } @@ -71,7 +72,7 @@ std = [] # be affected by feature selection here. Also note that it's highly unlikely you # want to configure this. If you're having trouble getting backtraces it's # likely best to open an issue. -gimli-symbolize = ["addr2line", "object", "std"] +gimli-symbolize = ["addr2line", "miniz_oxide", "object", "std"] libbacktrace = ["backtrace-sys/backtrace-sys"] #======================================= diff --git a/src/symbolize/gimli.rs b/src/symbolize/gimli.rs index 9f1ed9679..cedb89913 100644 --- a/src/symbolize/gimli.rs +++ b/src/symbolize/gimli.rs @@ -7,6 +7,7 @@ use self::gimli::read::EndianSlice; use self::gimli::LittleEndian as Endian; use self::mmap::Mmap; +use self::stash::Stash; use crate::symbolize::ResolveWhat; use crate::types::BytesOrWideString; use crate::SymbolName; @@ -26,6 +27,7 @@ mod mmap; #[cfg(unix)] #[path = "gimli/mmap_unix.rs"] mod mmap; +mod stash; const MAPPINGS_CACHE_SIZE: usize = 4; @@ -38,27 +40,28 @@ struct Mapping { // 'static lifetime is a lie to hack around lack of support for self-referential structs. cx: Context<'static>, _map: Mmap, + _stash: Stash, } -fn cx<'data>(object: Object<'data>) -> Option> { - fn load_section<'data, S>(obj: &Object<'data>) -> S +fn cx<'data>(stash: &'data Stash, object: Object<'data>) -> Option> { + fn load_section<'data, S>(stash: &'data Stash, obj: &Object<'data>) -> S where S: gimli::Section>, { - let data = obj.section(S::section_name()).unwrap_or(&[]); + let data = obj.section(stash, S::section_name()).unwrap_or(&[]); S::from(EndianSlice::new(data, Endian)) } let dwarf = addr2line::Context::from_sections( - load_section(&object), - load_section(&object), - load_section(&object), - load_section(&object), - load_section(&object), - load_section(&object), - load_section(&object), - load_section(&object), - load_section(&object), + load_section(stash, &object), + load_section(stash, &object), + load_section(stash, &object), + load_section(stash, &object), + load_section(stash, &object), + load_section(stash, &object), + load_section(stash, &object), + load_section(stash, &object), + load_section(stash, &object), gimli::EndianSlice::new(&[], Endian), ) .ok()?; @@ -66,16 +69,17 @@ fn cx<'data>(object: Object<'data>) -> Option> { } macro_rules! mk { - (Mapping { $map:expr, $inner:expr }) => {{ + (Mapping { $map:expr, $inner:expr, $stash:expr }) => {{ use crate::symbolize::gimli::{Context, Mapping, Mmap}; - fn assert_lifetimes<'a>(_: &'a Mmap, _: &Context<'a>) {} - assert_lifetimes(&$map, &$inner); + fn assert_lifetimes<'a>(_: &'a Mmap, _: &Context<'a>, _: &'a Stash) {} + assert_lifetimes(&$map, &$inner, &$stash); Mapping { // Convert to 'static lifetimes since the symbols should - // only borrow `map` and we're preserving `map` below. + // only borrow `map` and `stash` and we're preserving them below. cx: unsafe { core::mem::transmute::, Context<'static>>($inner) }, _map: $map, + _stash: $stash, } }}; } diff --git a/src/symbolize/gimli/coff.rs b/src/symbolize/gimli/coff.rs index ce96bb9d1..8c9d7c861 100644 --- a/src/symbolize/gimli/coff.rs +++ b/src/symbolize/gimli/coff.rs @@ -1,4 +1,4 @@ -use super::{Mapping, Path, Vec}; +use super::{Mapping, Path, Stash, Vec}; use object::pe::{ImageDosHeader, ImageSymbol}; use object::read::pe::{ImageNtHeaders, ImageOptionalHeader, SectionTable}; use object::read::StringTable; @@ -13,8 +13,9 @@ use std::convert::TryFrom; impl Mapping { pub fn new(path: &Path) -> Option { let map = super::mmap(path)?; - let cx = super::cx(Object::parse(&map)?)?; - Some(mk!(Mapping { map, cx })) + let stash = Stash::new(); + let cx = super::cx(&stash, Object::parse(&map)?)?; + Some(mk!(Mapping { map, cx, stash })) } } @@ -66,7 +67,7 @@ impl<'a> Object<'a> { }) } - pub fn section(&self, name: &str) -> Option<&'a [u8]> { + pub fn section(&self, _: &Stash, name: &str) -> Option<&'a [u8]> { Some( self.sections .section_by_name(self.strings, name.as_bytes())? diff --git a/src/symbolize/gimli/elf.rs b/src/symbolize/gimli/elf.rs index eb0ac88f5..ce703328d 100644 --- a/src/symbolize/gimli/elf.rs +++ b/src/symbolize/gimli/elf.rs @@ -1,7 +1,9 @@ -use super::{Mapping, Path, Vec}; -use object::read::elf::{FileHeader, SectionHeader, SectionTable, Sym}; +use super::{Mapping, Path, Stash, Vec}; +use object::elf::{ELFCOMPRESS_ZLIB, SHF_COMPRESSED}; +use object::read::elf::{CompressionHeader, FileHeader, SectionHeader, SectionTable, Sym}; use object::read::StringTable; -use object::{Bytes, NativeEndian}; +use object::{BigEndian, Bytes, NativeEndian}; +use std::io::Cursor; #[cfg(target_pointer_width = "32")] type Elf = object::elf::FileHeader32; @@ -11,8 +13,9 @@ type Elf = object::elf::FileHeader64; impl Mapping { pub fn new(path: &Path) -> Option { let map = super::mmap(path)?; - let cx = super::cx(Object::parse(&map)?)?; - Some(mk!(Mapping { map, cx })) + let stash = Stash::new(); + let cx = super::cx(&stash, Object::parse(&map)?)?; + Some(mk!(Mapping { map, cx, stash })) } } @@ -87,15 +90,53 @@ impl<'a> Object<'a> { }) } - pub fn section(&self, name: &str) -> Option<&'a [u8]> { - Some( - self.sections - .section_by_name(self.endian, name.as_bytes())? - .1 - .data(self.endian, self.data) - .ok()? - .0, - ) + pub fn section(&self, stash: &'a Stash, name: &str) -> Option<&'a [u8]> { + if let Some(section) = self.section_header(name) { + let mut data = section.data(self.endian, self.data).ok()?; + + // Check for DWARF-standard (gABI) compression, i.e., as generated + // by ld's `--compress-debug-sections=zlib-gabi` flag. + let flags: u64 = section.sh_flags(self.endian).into(); + if (flags & u64::from(SHF_COMPRESSED)) == 0 { + // Not compressed. + return Some(data.0); + } + + let header = data.read::<::CompressionHeader>().ok()?; + if header.ch_type(self.endian) != ELFCOMPRESS_ZLIB { + // Zlib compression is the only known type. + return None; + } + let size = header.ch_size(self.endian) as usize; + let buf = stash.allocate(size); + decompress_zlib(data.0, buf)?; + return Some(buf); + } + + // Check for the nonstandard GNU compression format, i.e., as generated + // by ld's `--compress-debug-sections=zlib-gnu` flag. + if !name.starts_with(".debug_") { + return None; + } + let zdebug_name = format!(".zdebug_{}", &name[7..]); + if let Some(section) = self.section_header(&zdebug_name) { + let mut data = section.data(self.endian, self.data).ok()?; + if data.read_bytes(8).ok()?.0 != b"ZLIB\0\0\0\0" { + return None; + } + let size = data.read::>().ok()?.get(BigEndian) as usize; + let buf = stash.allocate(size); + decompress_zlib(data.0, buf)?; + return Some(buf); + } + + None + } + + fn section_header(&self, name: &str) -> Option<&::SectionHeader> { + self.sections + .section_by_name(self.endian, name.as_bytes()) + .map(|(_index, section)| section) } pub fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> { @@ -112,3 +153,23 @@ impl<'a> Object<'a> { } } } + +fn decompress_zlib(input: &[u8], output: &mut [u8]) -> Option<()> { + use miniz_oxide::inflate::core::inflate_flags::{ + TINFL_FLAG_PARSE_ZLIB_HEADER, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF, + }; + use miniz_oxide::inflate::core::{decompress, DecompressorOxide}; + use miniz_oxide::inflate::TINFLStatus; + + let (status, in_read, out_read) = decompress( + &mut DecompressorOxide::new(), + input, + &mut Cursor::new(output), + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | TINFL_FLAG_PARSE_ZLIB_HEADER, + ); + if status == TINFLStatus::Done && in_read == input.len() && out_read == output.len() { + Some(()) + } else { + None + } +} diff --git a/src/symbolize/gimli/macho.rs b/src/symbolize/gimli/macho.rs index 5e8ed205e..f34f1814f 100644 --- a/src/symbolize/gimli/macho.rs +++ b/src/symbolize/gimli/macho.rs @@ -1,4 +1,4 @@ -use super::{Mapping, Path, Vec}; +use super::{Mapping, Path, Stash, Vec}; use core::convert::TryInto; use object::macho; use object::read::macho::{MachHeader, Nlist, Section, Segment as _}; @@ -49,8 +49,9 @@ impl Mapping { // Looks like nothing matched our UUID, so let's at least return our own // file. This should have the symbol table for at least some // symbolication purposes. - let inner = super::cx(Object::parse(macho, endian, data)?)?; - return Some(mk!(Mapping { map, inner })); + let stash = Stash::new(); + let inner = super::cx(&stash, Object::parse(macho, endian, data)?)?; + return Some(mk!(Mapping { map, inner, stash })); fn load_dsym(dir: &Path, uuid: [u8; 16]) -> Option { for entry in dir.read_dir().ok()? { @@ -62,8 +63,11 @@ impl Mapping { if entry_uuid != uuid { continue; } - if let Some(cx) = Object::parse(macho, endian, data).and_then(super::cx) { - return Some(mk!(Mapping { map, cx })); + let stash = Stash::new(); + if let Some(cx) = + Object::parse(macho, endian, data).and_then(|o| super::cx(&stash, o)) + { + return Some(mk!(Mapping { map, cx, stash })); } } @@ -175,7 +179,7 @@ impl<'a> Object<'a> { }) } - pub fn section(&self, name: &str) -> Option<&'a [u8]> { + pub fn section(&self, _: &Stash, name: &str) -> Option<&'a [u8]> { let name = name.as_bytes(); let dwarf = self.dwarf?; let section = dwarf.into_iter().find(|section| { diff --git a/src/symbolize/gimli/stash.rs b/src/symbolize/gimli/stash.rs new file mode 100644 index 000000000..615f847c8 --- /dev/null +++ b/src/symbolize/gimli/stash.rs @@ -0,0 +1,28 @@ +use std::cell::UnsafeCell; +use std::vec::Vec; + +/// A simple arena allocator for byte buffers. +pub struct Stash { + buffers: UnsafeCell>>, +} + +impl Stash { + pub fn new() -> Stash { + Stash { + buffers: UnsafeCell::new(Vec::new()), + } + } + + /// Allocates a buffer of the specified size and returns a mutable reference + /// to it. + pub fn allocate(&self, size: usize) -> &mut [u8] { + // SAFETY: this is the only function that ever constructs a mutable + // reference to `self.buffers`. + let buffers = unsafe { &mut *self.buffers.get() }; + let i = buffers.len(); + buffers.push(vec![0; size]); + // SAFETY: we never remove elements from `self.buffers`, so a reference + // to the data inside any buffer will live as long as `self` does. + &mut buffers[i] + } +}