Skip to content

Commit

Permalink
Fix issues with go binary version detect by aligning it with the go
Browse files Browse the repository at this point in the history
… cli implementation (#1605)
  • Loading branch information
ddelnano committed Jul 13, 2023
1 parent b9c61b7 commit 42177b7
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 46 deletions.
3 changes: 3 additions & 0 deletions src/stirling/obj_tools/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pl_cc_library(
"//src/common/fs:cc_library",
"//src/common/system:cc_library",
"//src/shared/types/typespb/wrapper:cc_library",
"//src/stirling/utils:cc_library",
"@com_github_serge1_elfio//:elfio",
],
)
Expand Down Expand Up @@ -119,6 +120,8 @@ pl_cc_test(
name = "go_syms_test",
srcs = ["go_syms_test.cc"],
data = [
"//src/stirling/obj_tools/testdata/go:test_binaries",
"//src/stirling/obj_tools/testdata/go:test_go_1_17_binary",
"//src/stirling/obj_tools/testdata/go:test_go_1_19_binary",
],
deps = [
Expand Down
32 changes: 26 additions & 6 deletions src/stirling/obj_tools/elf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include "src/common/base/base.h"
#include "src/stirling/obj_tools/utils.h"

using ::px::utils::u8string;

namespace px {
namespace stirling {
namespace obj_tools {
Expand Down Expand Up @@ -156,29 +158,47 @@ class ElfReader {
/**
* Returns the byte code for the symbol at the specified section.
*/
StatusOr<px::utils::u8string> SymbolByteCode(std::string_view section, const SymbolInfo& symbol);
StatusOr<u8string> SymbolByteCode(std::string_view section, const SymbolInfo& symbol);

/**
* Returns the virtual address in the ELF file of offset 0x0. Calculated by finding the first
* loadable segment and returning its virtual address minus its file offset.
*/
StatusOr<uint64_t> GetVirtualAddrAtOffsetZero();

/**
* Returns the ELF section with the corresponding name
*/
StatusOr<ELFIO::section*> SectionWithName(std::string_view section_name);

/**
* Returns the ELF type of this binary. (eg. ELFIO::ET_EXEC or ELFIO::ET_DYN).
*/
ELFIO::Elf_Half ELFType();

/**
* Returns the byte code of the data within the binary at the specified offset
*/
template <typename TCharType = u8string::value_type>
StatusOr<std::basic_string<TCharType>> BinaryByteCode(size_t offset, size_t length) {
std::ifstream ifs(binary_path_, std::ios::binary);
if (!ifs.seekg(offset)) {
return error::Internal("Failed to seek position=$0 in binary=$1", offset, binary_path_);
}
std::basic_string<TCharType> byte_code(length, '\0');
auto* buf = reinterpret_cast<char*>(byte_code.data());
if (!ifs.read(buf, length)) {
return error::Internal("Failed to read size=$0 bytes from offset=$1 in binary=$2", length,
offset, binary_path_);
}
return byte_code;
}

private:
ElfReader() = default;

StatusOr<ELFIO::section*> SymtabSection();

/**
* Returns the ELF section with the corresponding name
*/
StatusOr<ELFIO::section*> SectionWithName(std::string_view section_name);

/**
* Locates the debug symbols for the currently loaded ELF object.
* External symbols are discovered using either the build-id or the debug-link.
Expand Down
128 changes: 92 additions & 36 deletions src/stirling/obj_tools/go_syms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,20 @@
*/

#include "src/stirling/obj_tools/go_syms.h"
#include "src/stirling/utils/binary_decoder.h"

#include <utility>

namespace px {
namespace stirling {
namespace obj_tools {

using px::utils::u8string_view;
using read_ptr_func_t = std::function<uint64_t(u8string_view)>;

// This symbol points to a static string variable that describes the Golang tool-chain version used
// to build the executable. This symbol is embedded in a Golang executable's data section.
constexpr std::string_view kGoBuildVersionSymbol = "runtime.buildVersion";
constexpr std::string_view kGoBuildVersionStrSymbol = "runtime.buildVersion.str";

namespace {

Expand All @@ -44,48 +47,101 @@ bool IsGoExecutable(ElfReader* elf_reader) {
return elf_reader->SearchTheOnlySymbol(obj_tools::kGoBuildVersionSymbol).ok();
}

// TODO(ddelnano): Between Go 1.20.2 and 1.20.4 our build version detection started failing.
// Our version detection's assumptions is very different from how Go does this internally
// and needs a signficant rehaul. That is being tracked in
// https://github.com/pixie-io/pixie/issues/1318 but in the meantime optimisitcally read the
// runtime.buildVersion.str before following our previous heuristic.
StatusOr<std::string> ReadBuildVersionDirect(ElfReader* elf_reader) {
auto str_symbol_status = elf_reader->SearchTheOnlySymbol(kGoBuildVersionStrSymbol);
if (!str_symbol_status.ok()) {
return error::NotFound("Unable to find runtime.buildVersion.str");
}
auto str_symbol = str_symbol_status.ValueOrDie();
PX_ASSIGN_OR_RETURN(auto symbol_bytecode, elf_reader->SymbolByteCode(".rodata", str_symbol));
return std::string(reinterpret_cast<const char*>(symbol_bytecode.data()),
symbol_bytecode.size() - 1);
constexpr std::string_view kGoBuildInfoSection = ".go.buildinfo";
// kGoBuildInfoMagic corresponds to "\xff Go buildinf:"
// https://github.com/golang/go/blob/1dbbafc70fd3e2c284469ab3e0936c1bb56129f6/src/debug/buildinfo/buildinfo.go#L49
std::string_view kGoBuildInfoMagic =
CreateStringView<char>("\xff\x20\x47\x6f\x20\x62\x75\x69\x6c\x64\x69\x6e\x66\x3a");

// Reads a Go string encoded within a buildinfo header. This function is meant to provide the same
// functionality as
// https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L244C37-L244C44
StatusOr<std::string> ReadGoString(ElfReader* elf_reader, uint64_t ptr_size, uint64_t ptr_addr,
read_ptr_func_t read_ptr) {
PX_ASSIGN_OR_RETURN(u8string_view data_addr, elf_reader->BinaryByteCode(ptr_addr, ptr_size));
PX_ASSIGN_OR_RETURN(u8string_view data_len,
elf_reader->BinaryByteCode(ptr_addr + ptr_size, ptr_size));

PX_ASSIGN_OR_RETURN(uint64_t vaddr_offset, elf_reader->GetVirtualAddrAtOffsetZero());
ptr_addr = read_ptr(data_addr) - vaddr_offset;
uint64_t str_length = read_ptr(data_len);

PX_ASSIGN_OR_RETURN(std::string_view go_version_bytecode,
elf_reader->BinaryByteCode<char>(ptr_addr, str_length));
return std::string(go_version_bytecode);
}

StatusOr<std::string> ReadBuildVersion(ElfReader* elf_reader) {
auto direct_version_str = ReadBuildVersionDirect(elf_reader);
if (!direct_version_str.ok()) {
LOG(INFO) << absl::Substitute(
"Falling back to the runtime.buildVersion symbol for go version detection");
} else {
return direct_version_str;
// Reads the buildinfo header embedded in the .go.buildinfo ELF section in order to determine the go
// toolchain version. This function emulates what the go version cli performs as seen
// https://github.com/golang/go/blob/cb7a091d729eab75ccfdaeba5a0605f05addf422/src/debug/buildinfo/buildinfo.go#L151-L221
StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader) {
PX_ASSIGN_OR_RETURN(ELFIO::section * section, elf_reader->SectionWithName(kGoBuildInfoSection));
int offset = section->get_offset();
PX_ASSIGN_OR_RETURN(std::string_view buildInfoByteCode,
elf_reader->BinaryByteCode<char>(offset, 64 * 1024));

BinaryDecoder binary_decoder(buildInfoByteCode);

PX_CHECK_OK(binary_decoder.ExtractStringUntil(kGoBuildInfoMagic));
PX_ASSIGN_OR_RETURN(uint8_t ptr_size, binary_decoder.ExtractInt<uint8_t>());
PX_ASSIGN_OR_RETURN(uint8_t endianness, binary_decoder.ExtractInt<uint8_t>());

// If the endianness has its second bit set, then the go version immediately follows the 32 bit
// header specified by the varint encoded string data
if ((endianness & 0x2) != 0) {
// Skip the remaining 16 bytes of buildinfo header
PX_CHECK_OK(binary_decoder.ExtractBufIgnore(16));

PX_ASSIGN_OR_RETURN(uint64_t size, binary_decoder.ExtractUVarInt());
PX_ASSIGN_OR_RETURN(std::string_view go_version, binary_decoder.ExtractString(size));
return std::string(go_version);
}
PX_ASSIGN_OR_RETURN(ElfReader::SymbolInfo symbol,
elf_reader->SearchTheOnlySymbol(kGoBuildVersionSymbol));

// The address of this symbol points to a Golang string object.
// But the size is for the symbol table entry, not this string object.
symbol.size = sizeof(gostring);
PX_ASSIGN_OR_RETURN(utils::u8string version_code, elf_reader->SymbolByteCode(".data", symbol));
read_ptr_func_t read_ptr;
switch (endianness) {
case 0x0: {
if (ptr_size == 4) {
read_ptr = [&](u8string_view str_view) {
return utils::LEndianBytesToInt<uint32_t, 4>(str_view);
};
} else if (ptr_size == 8) {
read_ptr = [&](u8string_view str_view) {
return utils::LEndianBytesToInt<uint64_t, 8>(str_view);
};
} else {
return error::NotFound(absl::Substitute(
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
}
break;
}
case 0x1:
if (ptr_size == 4) {
read_ptr = [&](u8string_view str_view) {
return utils::BEndianBytesToInt<uint64_t, 4>(str_view);
};
} else if (ptr_size == 8) {
read_ptr = [&](u8string_view str_view) {
return utils::BEndianBytesToInt<uint64_t, 8>(str_view);
};
} else {
return error::NotFound(absl::Substitute(
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
}
break;
default: {
auto msg =
absl::Substitute("Invalid endianness=$0, refusing to parse non go binary", endianness);
DCHECK(false) << msg;
return error::NotFound(msg);
}
}

// We can't guarantee the alignment on version_string so we make a copy into an aligned address.
gostring version_string;
std::memcpy(&version_string, version_code.data(), sizeof(version_string));
PX_ASSIGN_OR_RETURN(uint64_t vaddr_offset, elf_reader->GetVirtualAddrAtOffsetZero());

ElfReader::SymbolInfo version_symbol;
version_symbol.address = reinterpret_cast<uint64_t>(version_string.ptr);
version_symbol.size = version_string.len;
PX_ASSIGN_OR_RETURN(auto s, binary_decoder.ExtractString<u8string_view::value_type>(ptr_size));
uint64_t ptr_addr = read_ptr(s) - vaddr_offset;

PX_ASSIGN_OR_RETURN(utils::u8string str, elf_reader->SymbolByteCode(".data", version_symbol));
return std::string(reinterpret_cast<const char*>(str.data()), str.size());
return ReadGoString(elf_reader, ptr_size, ptr_addr, read_ptr);
}

StatusOr<absl::flat_hash_map<std::string, std::vector<IntfImplTypeInfo>>> ExtractGolangInterfaces(
Expand Down
2 changes: 1 addition & 1 deletion src/stirling/obj_tools/go_syms.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ bool IsGoExecutable(ElfReader* elf_reader);
// elf_reader.
// TODO(yzhao): We'll use this to determine the corresponding Golang executable's TLS data
// structures and their offsets.
StatusOr<std::string> ReadBuildVersion(ElfReader* elf_reader);
StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader);

// Describes a Golang type that implement an interface.
struct IntfImplTypeInfo {
Expand Down
27 changes: 25 additions & 2 deletions src/stirling/obj_tools/go_syms_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,39 @@ namespace obj_tools {
using ::testing::Field;
using ::testing::StrEq;

constexpr std::string_view kTestGoLittleEndiani386BinaryPath =
"src/stirling/obj_tools/testdata/go/test_go1_13_i386_binary";

constexpr std::string_view kTestGoLittleEndianBinaryPath =
"src/stirling/obj_tools/testdata/go/test_go_1_17_binary";

constexpr std::string_view kTestGoBinaryPath =
"src/stirling/obj_tools/testdata/go/test_go_1_19_binary";

TEST(ReadBuildVersionTest, WorkingOnBasicGoBinary) {
// The "endian agnostic" case refers to where the Go version data is varint encoded
// directly within the buildinfo header. See the following reference for more details.
// https://github.com/golang/go/blob/1dbbafc70fd3e2c284469ab3e0936c1bb56129f6/src/debug/buildinfo/buildinfo.go#L184C16-L184C16
TEST(ReadGoBuildVersionTest, BuildinfoEndianAgnostic) {
const std::string kPath = px::testing::BazelRunfilePath(kTestGoBinaryPath);
ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader, ElfReader::Create(kPath));
ASSERT_OK_AND_ASSIGN(std::string version, ReadBuildVersion(elf_reader.get()));
ASSERT_OK_AND_ASSIGN(std::string version, ReadGoBuildVersion(elf_reader.get()));
EXPECT_THAT(version, StrEq("go1.19.10"));
}

TEST(ReadGoBuildVersionTest, BuildinfoLittleEndian) {
const std::string kPath = px::testing::BazelRunfilePath(kTestGoLittleEndianBinaryPath);
ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader, ElfReader::Create(kPath));
ASSERT_OK_AND_ASSIGN(std::string version, ReadGoBuildVersion(elf_reader.get()));
EXPECT_THAT(version, StrEq("go1.17.13"));
}

TEST(ReadGoBuildVersionTest, BuildinfoLittleEndiani386) {
const std::string kPath = px::testing::BazelRunfilePath(kTestGoLittleEndiani386BinaryPath);
ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader, ElfReader::Create(kPath));
ASSERT_OK_AND_ASSIGN(std::string version, ReadGoBuildVersion(elf_reader.get()));
EXPECT_THAT(version, StrEq("go1.13.15"));
}

TEST(IsGoExecutableTest, WorkingOnBasicGoBinary) {
const std::string kPath = px::testing::BazelRunfilePath(kTestGoBinaryPath);
ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader, ElfReader::Create(kPath));
Expand Down
9 changes: 9 additions & 0 deletions src/stirling/obj_tools/testdata/go/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ filegroup(
testonly = True,
srcs = [
"sockshop_payments_service",
# This binary was built with go 1.13 for GOARCH=386. This ensures that the 32 bit little
# endian case buildinfo logic is tested
# (https://github.com/golang/go/blob/1dbbafc70fd3e2c284469ab3e0936c1bb56129f6/src/debug/buildinfo/buildinfo.go#L192-L208).
# Newer versions of go generate the endian agnostic buildinfo header
# (https://github.com/golang/go/blob/1dbbafc70fd3e2c284469ab3e0936c1bb56129f6/src/debug/buildinfo/buildinfo.go#L189-L190)
# and so it cannot be tested without compiling against an older Go version.
# These older 32 bit binaries have been the source of bugs, so this test case verifies we don't
# introduce a regression (https://github.com/pixie-io/pixie/issues/1300).
"test_go1_13_i386_binary",
":test_go_1_17_binary",
":test_go_1_18_binary",
":test_go_1_19_binary",
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ Status PopulateHTTP2DebugSymbols(DwarfReader* dwarf_reader, std::string_view ven

Status PopulateGoTLSDebugSymbols(ElfReader* elf_reader, DwarfReader* dwarf_reader,
struct go_tls_symaddrs_t* symaddrs) {
PX_ASSIGN_OR_RETURN(std::string build_version, ReadBuildVersion(elf_reader));
PX_ASSIGN_OR_RETURN(std::string build_version, ReadGoBuildVersion(elf_reader));
PX_ASSIGN_OR_RETURN(SemVer go_version, GetSemVer(build_version, false));
std::string retval0_arg = "~r1";
std::string retval1_arg = "~r2";
Expand Down

0 comments on commit 42177b7

Please sign in to comment.