rust-lang · alexcrichton · Oct 25, 2022 · Oct 21, 2022 · Oct 20, 2022 · Oct 21, 2022
diff --git a/Cargo.toml b/Cargo.toml
@@ -130,3 +130,8 @@ edition = '2018'
 name = "concurrent-panics"
 required-features = ["std"]
 harness = false
+
+[[test]]
+name = "current-exe-mismatch"
+required-features = ["std"]
+harness = false
diff --git a/src/symbolize/gimli.rs b/src/symbolize/gimli.rs
@@ -184,6 +184,8 @@ cfg_if::cfg_if! {
     ))] {
         mod libs_dl_iterate_phdr;
         use libs_dl_iterate_phdr::native_libraries;
+        #[path = "gimli/parse_running_mmaps_unix.rs"]
+        mod parse_running_mmaps;
     } else if #[cfg(target_env = "libnx")] {
         mod libs_libnx;
         use libs_libnx::native_libraries;

diff --git a/src/symbolize/gimli/libs_dl_iterate_phdr.rs b/src/symbolize/gimli/libs_dl_iterate_phdr.rs
@@ -17,6 +17,19 @@ pub(super) fn native_libraries() -> Vec<Library> {
     return ret;
 }
 
+fn infer_current_exe(base_addr: usize) -> OsString {
+    if let Ok(entries) = super::parse_running_mmaps::parse_maps() {
+        let opt_path = entries.iter()
+            .find(|e| e.ip_matches(base_addr) && e.pathname().len() > 0)
+            .map(|e|e.pathname())
+            .cloned();
+        if let Some(path) = opt_path {
+            return path;
+        }
+    }
+    env::current_exe().map(|e| e.into()).unwrap_or_default()
+}
+
 // `info` should be a valid pointers.
 // `vec` should be a valid pointer to a `std::Vec`.
 unsafe extern "C" fn callback(
@@ -28,8 +41,12 @@ unsafe extern "C" fn callback(
     let libs = &mut *(vec as *mut Vec<Library>);
     let is_main_prog = info.dlpi_name.is_null() || *info.dlpi_name == 0;
     let name = if is_main_prog {
+        // The man page for dl_iterate_phdr says that the first object visited by
+        // callback is the main program; so the first time we encounter a
+        // nameless entry, we can assume its the main program and try to infer its path.
+        // After that, we cannot continue that assumption, and we use an empty string.
         if libs.is_empty() {
-            env::current_exe().map(|e| e.into()).unwrap_or_default()
+            infer_current_exe(info.dlpi_addr as usize)
         } else {
             OsString::new()
         }

diff --git a/src/symbolize/gimli/parse_running_mmaps_unix.rs b/src/symbolize/gimli/parse_running_mmaps_unix.rs
@@ -0,0 +1,152 @@
+// Note: This file is only currently used on targets that call out to the code
+// in `mod libs_dl_iterate_phdr` (e.g. linux, freebsd, ...); it may be more
+// general purpose, but it hasn't been tested elsewhere.
+
+use super::mystd::fs::File;
+use super::mystd::io::{BufRead, BufReader};
+use super::mystd::str::FromStr;
+use super::{OsString, Vec};
+
+#[derive(PartialEq, Eq, Debug)]
+pub(super) struct MapsEntry {
+    /// start (inclusive) and limit (exclusive) of address range.
+    address: (usize, usize),
+    /// The perms field are the permissions for the entry
+    ///
+    /// r = read
+    /// w = write
+    /// x = execute
+    /// s = shared
+    /// p = private (copy on write)
+    perms: [char; 4],
+    /// Offset into the file (or "whatever").
+    offset: usize,
+    /// device (major, minor)
+    dev: (usize, usize),
+    /// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS).
+    inode: usize,
+    /// Usually the file backing the mapping.
+    ///
+    /// Note: The man page for proc includes a note about "coordination" by
+    /// using readelf to see the Offset field in ELF program headers. pnkfelix
+    /// is not yet sure if that is intended to be a comment on pathname, or what
+    /// form/purpose such coordination is meant to have.
+    ///
+    /// There are also some pseudo-paths:
+    /// "[stack]": The initial process's (aka main thread's) stack.
+    /// "[stack:<tid>]": a specific thread's stack. (This was only present for a limited range of Linux verisons; it was determined to be too expensive to provide.)
+    /// "[vdso]": Virtual dynamically linked shared object
+    /// "[heap]": The process's heap
+    ///
+    /// The pathname can be blank, which means it is an anonymous mapping
+    /// obtained via mmap.
+    ///
+    /// Newlines in pathname are replaced with an octal escape sequence.
+    ///
+    /// The pathname may have "(deleted)" appended onto it if the file-backed
+    /// path has been deleted.
+    ///
+    /// Note that modifications like the latter two indicated above imply that
+    /// in general the pathname may be ambiguous. (I.e. you cannot tell if the
+    /// denoted filename actually ended with the text "(deleted)", or if that
+    /// was added by the maps rendering.
+    pathname: OsString,
+}
+
+pub(super) fn parse_maps() -> Result<Vec<MapsEntry>, &'static str> {
 } else if #[cfg(all( 
     any( 
         target_os = "linux", 
         target_os = "fuchsia", 
         target_os = "freebsd", 
         target_os = "openbsd", 
         all(target_os = "android", feature = "dl_iterate_phdr"), 
     ), 
     not(target_env = "uclibc"), 
 ))] { 
     mod libs_dl_iterate_phdr; 
 } else if #[cfg(all( 
     any( 
         target_os = "linux", 
         target_os = "fuchsia", 
         target_os = "freebsd", 
         target_os = "openbsd", 
         all(target_os = "android", feature = "dl_iterate_phdr"), 
     ), 
     not(target_env = "uclibc"), 
 ))] { 
     mod libs_dl_iterate_phdr; 
+    let mut v = Vec::new();
+    let proc_self_maps = File::open("/proc/self/maps").map_err(|_| "couldnt open /proc/self/maps")?;
+    let proc_self_maps = BufReader::new(proc_self_maps);
+    for line in proc_self_maps.lines() {
+        let line = line.map_err(|_io_error| "couldnt read line from /proc/self/maps")?;
+        v.push(line.parse()?);
+    }
+
+    Ok(v)
+}
+
+impl MapsEntry {
+    pub(super) fn pathname(&self) -> &OsString {
+        &self.pathname
+    }
+
+    pub(super) fn ip_matches(&self, ip: usize) -> bool {
+        self.address.0 <= ip && ip < self.address.1
+    }
+}
+
+impl FromStr for MapsEntry {
+    type Err = &'static str;
+
+    // Format: address perms offset dev inode pathname
+    // e.g.: "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]"
+    // e.g.: "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795                  /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2"
+    // e.g.: "35b1a21000-35b1a22000 rw-p 00000000 00:00 0"
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let mut parts = s
+            .split(' ') // space-separated fields
+            .filter(|s| s.len() > 0); // multiple spaces implies empty strings that need to be skipped.
+        let range_str = parts.next().ok_or("Couldn't find address")?;
+        let perms_str = parts.next().ok_or("Couldn't find permissions")?;
+        let offset_str = parts.next().ok_or("Couldn't find offset")?;
+        let dev_str = parts.next().ok_or("Couldn't find dev")?;
+        let inode_str = parts.next().ok_or("Couldn't find inode")?;
+        let pathname_str = parts.next().unwrap_or(""); // pathname may be omitted.
+
+        let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "couldnt parse hex number");
+        let address = {
+            let (start, limit) = range_str.split_once('-').ok_or("Couldn't parse address range")?;
+            (hex(start)?, hex(limit)?)
+        };
+        let perms: [char; 4] = {
+            let mut chars = perms_str.chars();
+            let mut c = || chars.next().ok_or("insufficient perms");
+            let perms = [c()?, c()?, c()?, c()?];
+            if chars.next().is_some() { return Err("too many perms"); }
+            perms
+        };
+        let offset = hex(offset_str)?;
+        let dev = {
+            let (major, minor) = dev_str.split_once(':').ok_or("Couldn't parse dev")?;
+            (hex(major)?, hex(minor)?)
+        };
+        let inode = hex(inode_str)?;
+        let pathname = pathname_str.into();
+
+        Ok(MapsEntry { address, perms, offset, dev, inode, pathname })
+    }
+}
+
+#[test]
+fn check_maps_entry_parsing() {
+    assert_eq!("ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  \
+                [vsyscall]".parse::<MapsEntry>().unwrap(),
+               MapsEntry {
+                   address: (0xffffffffff600000, 0xffffffffff601000),
+                   perms: ['-','-','x','p'],
+                   offset: 0x00000000,
+                   dev: (0x00, 0x00),
+                   inode: 0x0,
+                   pathname: "[vsyscall]".into(),
+               });
+
+    assert_eq!("7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795                  \
+                /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".parse::<MapsEntry>().unwrap(),
+                 MapsEntry {
+                     address: (0x7f5985f46000, 0x7f5985f48000),
+                     perms: ['r','w','-','p'],
+                     offset: 0x00039000,
+                     dev: (0x103, 0x06),
+                     inode: 0x76021795,
+                     pathname: "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".into(),
+                 });
+    assert_eq!("35b1a21000-35b1a22000 rw-p 00000000 00:00 0".parse::<MapsEntry>().unwrap(),
+                 MapsEntry {
+                     address: (0x35b1a21000, 0x35b1a22000),
+                     perms: ['r','w','-','p'],
+                     offset: 0x00000000,
+                     dev: (0x00,0x00),
+                     inode: 0x0,
+                     pathname: Default::default(),
+                 });
+}
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
@@ -0,0 +1,14 @@
+/// Some tests only make sense in contexts where they can re-exec the test
+/// itself. Not all contexts support this, so you can call this method to find
+/// out which case you are in.
+pub fn cannot_reexec_the_test() -> bool {
+    // These run in docker containers on CI where they can't re-exec the test,
+    // so just skip these for CI. No other reason this can't run on those
+    // platforms though.
+    // Miri does not have support for re-execing a file
+    cfg!(unix)
+        && (cfg!(target_arch = "arm")
+            || cfg!(target_arch = "aarch64")
+            || cfg!(target_arch = "s390x"))
+        || cfg!(miri)
+}
diff --git a/tests/concurrent-panics.rs b/tests/concurrent-panics.rs
@@ -9,16 +9,11 @@ const PANICS: usize = 100;
 const THREADS: usize = 8;
 const VAR: &str = "__THE_TEST_YOU_ARE_LUKE";
 
+mod common;
+
 fn main() {
-    // These run in docker containers on CI where they can't re-exec the test,
-    // so just skip these for CI. No other reason this can't run on those
-    // platforms though.
-    // Miri does not have support for re-execing a file
-    if cfg!(unix)
-        && (cfg!(target_arch = "arm")
-            || cfg!(target_arch = "aarch64")
-            || cfg!(target_arch = "s390x"))
-        || cfg!(miri)
+    // If we cannot re-exec this test, there's no point in trying to do it.
+    if common::cannot_reexec_the_test()
     {
         println!("test result: ok");
         return;

diff --git a/tests/current-exe-mismatch.rs b/tests/current-exe-mismatch.rs
@@ -0,0 +1,133 @@
+// rust-lang/rust#101913: when you run your program explicitly via `ld.so`,
+// `std::env::current_exe` will return the path of *that* program, and not
+// the Rust program itself.
+
+use std::process::Command;
+use std::path::{Path, PathBuf};
+use std::io::{BufRead, BufReader};
+
+mod common;
+
+fn main() {
+    if std::env::var(VAR).is_err() {
+        // the parent waits for the child; then we then handle either printing
+        // "test result: ok", "test result: ignored", or panicking.
+        match parent() {
+            Ok(()) => {
+                println!("test result: ok");
+            }
+            Err(EarlyExit::IgnoreTest(_)) => {
+                println!("test result: ignored");
+            }
+            Err(EarlyExit::IoError(e)) => {
+                println!("{} parent encoutered IoError: {:?}", file!(), e);
+                panic!();
+            }
+        }
+    } else {
+        // println!("{} running child", file!());
+        child().unwrap();
+    }
+}
+
+const VAR: &str = "__THE_TEST_YOU_ARE_LUKE";
+
+#[derive(Debug)]
+enum EarlyExit {
+    IgnoreTest(String),
+    IoError(std::io::Error),
+}
+
+impl From<std::io::Error> for EarlyExit {
+    fn from(e: std::io::Error) -> Self {
+        EarlyExit::IoError(e)
+    }
+}
+
+fn parent() -> Result<(), EarlyExit> {
+    // If we cannot re-exec this test, there's no point in trying to do it.
+    if common::cannot_reexec_the_test()
+    {
+        return Err(EarlyExit::IgnoreTest("(cannot reexec)".into()));
+    }
+
+    let me = std::env::current_exe().unwrap();
+    let ld_so = find_interpreter(&me)?;
+
+    // use interp to invoke current exe, yielding child test.
+    //
+    // (if you're curious what you might compare this against, you can try
+    // swapping in the below definition for `result`, which is the easy case of
+    // not using the ld.so interpreter directly that Rust handled fine even
+    // prior to resolution of rust-lang/rust#101913.)
+    //
+    // let result = Command::new(me).env(VAR, "1").output()?;
+    let result = Command::new(ld_so).env(VAR, "1").arg(&me).output().unwrap();
+
+    if result.status.success() {
+        return Ok(());
+    }
+    println!("stdout:\n{}", String::from_utf8_lossy(&result.stdout));
+    println!("stderr:\n{}", String::from_utf8_lossy(&result.stderr));
+    println!("code: {}", result.status);
+    panic!();
+}
+
+fn child() -> Result<(), EarlyExit> {
+    let bt = backtrace::Backtrace::new();
+    println!("{:?}", bt);
+
+    let mut found_my_name = false;
+
+    let my_filename = file!();
+    'frames: for frame in bt.frames() {
+        let symbols = frame.symbols();
+        if symbols.is_empty() {
+            continue;
+        }
+
+        for sym in symbols {
+            if let Some(filename) = sym.filename() {
+                if filename.ends_with(my_filename) {
+                    // huzzah!
+                    found_my_name = true;
+                    break 'frames;
+                }
+            }
+        }
+    }
+
+    assert!(found_my_name);
+
+    Ok(())
+}
+
+// we use the `readelf` command to extract the path to the interpreter requested
+// by our binary.
+//
+// if we cannot `readelf` for some reason, or if we fail to parse its output,
+// then we will just give up on this test (and not treat it as a test failure).
+fn find_interpreter(me: &Path) -> Result<PathBuf, EarlyExit> {
+    let result = Command::new("readelf")
+        .arg("-l")
+        .arg(me)
+        .output()
+        .unwrap();
+    if result.status.success() {
+        let r = BufReader::new(&result.stdout[..]);
+        for line in r.lines() {
+            let line = line?;
+            let line = line.trim();
+            let prefix = "[Requesting program interpreter: ";
+            if let Some((_, suffix)) = line.split_once(prefix) {
+                if let Some((found_path, _ignore_suffix)) = suffix.rsplit_once("]") {
+                    return Ok(found_path.into());
+                }
+            }
+        }
+
+        Err(EarlyExit::IgnoreTest("could not find interpreter from readelf output".into()))
+    } else {
+        Err(EarlyExit::IgnoreTest("readelf invocation failed".into()))
+    }
+}