Projects
Mega:24.03:SP1:Everything
openjdk-1.8.0
_service:tar_scm:8242181-Show-source-informatio...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:8242181-Show-source-information-when-printing-native.patch of Package openjdk-1.8.0
From 24dedc988ac599b3191f6a69c1bce35fcc6bf748 Mon Sep 17 00:00:00 2001 From: eapen <zhangyipeng7@huawei.com> Date: Thu, 15 Dec 2022 20:05:20 +0800 Subject: [PATCH 23/33] I68TO2: 8242181: Show source information when printing native stack traces in hs_err files --- hotspot/src/share/vm/runtime/globals.hpp | 3 + hotspot/src/share/vm/utilities/debug.cpp | 9 + hotspot/src/share/vm/utilities/decoder.cpp | 9 + hotspot/src/share/vm/utilities/decoder.hpp | 17 + hotspot/src/share/vm/utilities/decoder_elf.cpp | 39 + hotspot/src/share/vm/utilities/decoder_elf.hpp | 2 + hotspot/src/share/vm/utilities/elfFile.cpp | 1510 +++++++++++++++++++- hotspot/src/share/vm/utilities/elfFile.hpp | 732 +++++++++- hotspot/src/share/vm/utilities/nativeCallStack.cpp | 11 +- hotspot/src/share/vm/utilities/vmError.cpp | 6 + hotspot/src/share/vm/utilities/vmError.hpp | 3 + jdk/test/jdk/java/dwarf/TestDwarf.java | 240 ++++ 12 files changed, 2563 insertions(+), 18 deletions(-) create mode 100644 jdk/test/jdk/java/dwarf/TestDwarf.java diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 64d40e0..d1e3cda 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -546,6 +546,9 @@ class CommandLineFlags { develop(bool, CleanChunkPoolAsync, falseInEmbedded, \ "Clean the chunk pool asynchronously") \ \ + develop(intx, TraceDwarfLevel, 0, \ + "Debug levels for the dwarf parser") \ + \ /* Temporary: See 6948537 */ \ experimental(bool, UseMemSetInBOT, true, \ "(Unstable) uses memset in BOT updates in GC code") \ diff --git a/hotspot/src/share/vm/utilities/debug.cpp b/hotspot/src/share/vm/utilities/debug.cpp index 8cea16d..6a9310e 100644 --- a/hotspot/src/share/vm/utilities/debug.cpp +++ b/hotspot/src/share/vm/utilities/debug.cpp @@ -51,6 +51,7 @@ #include "utilities/events.hpp" #include "utilities/top.hpp" #include "utilities/vmError.hpp" +#include "utilities/decoder.hpp" #ifdef TARGET_OS_FAMILY_linux # include "os_linux.inline.hpp" #endif @@ -751,7 +752,15 @@ void print_native_stack(outputStream* st, frame fr, Thread* t, char* buf, int bu int count = 0; while (count++ < StackPrintLimit) { fr.print_on_error(st, buf, buf_size); + + char filename[128]; + int line_no; + if (Decoder::get_source_info(fr.pc(), filename, sizeof(filename), &line_no, count != 1)) { + st->print(" (%s:%d)", filename, line_no); + } + st->cr(); + // Compiled code may use EBP register on x86 so it looks like // non-walkable C frame. Use frame.sender() for java frames. if (t && t->is_Java_thread()) { diff --git a/hotspot/src/share/vm/utilities/decoder.cpp b/hotspot/src/share/vm/utilities/decoder.cpp index 7ed913a..ae58bf5 100644 --- a/hotspot/src/share/vm/utilities/decoder.cpp +++ b/hotspot/src/share/vm/utilities/decoder.cpp @@ -143,6 +143,15 @@ bool Decoder::can_decode_C_frame_in_vm() { return decoder->can_decode_C_frame_in_vm(); } +bool Decoder::get_source_info(address pc, char* filename, size_t filename_len, int* line, bool is_pc_after_call) { + if (VMError::is_error_reported_in_current_thread()) { + return get_error_handler_instance()->get_source_info(pc, filename, filename_len, line, is_pc_after_call); + } else { + MutexLockerEx locker(shared_decoder_lock(), Mutex::_no_safepoint_check_flag); + return get_shared_instance()->get_source_info(pc, filename, filename_len, line, is_pc_after_call); + } +} + /* * Shutdown shared decoder and replace it with * _do_nothing_decoder. Do nothing with error handler diff --git a/hotspot/src/share/vm/utilities/decoder.hpp b/hotspot/src/share/vm/utilities/decoder.hpp index c6c09e3..e83b87f 100644 --- a/hotspot/src/share/vm/utilities/decoder.hpp +++ b/hotspot/src/share/vm/utilities/decoder.hpp @@ -68,6 +68,11 @@ public: return (status > 0); } + // Get filename and line number information. + virtual bool get_source_info(address pc, char* filename, size_t filename_len, int* line, bool is_pc_after_call) { + return false; + } + protected: decoder_status _decoder_status; }; @@ -97,6 +102,11 @@ public: virtual bool can_decode_C_frame_in_vm() const { return false; } + + // Get filename and line number information. + virtual bool get_source_info(address pc, char* filename, size_t filename_len, int* line, bool is_pc_after_call) { + return false; + } }; @@ -107,6 +117,13 @@ public: static bool demangle(const char* symbol, char* buf, int buflen); static bool can_decode_C_frame_in_vm(); + // Attempts to retrieve source file name and line number associated with a pc. + // If filename != NULL, points to a buffer of size filename_len which will receive the + // file name. File name will be silently truncated if output buffer is too small. + // If is_pc_after_call is true, then pc is treated as pointing to the next instruction + // after a call. The source information for the call instruction is fetched in that case. + static bool get_source_info(address pc, char* filename, size_t filename_len, int* line, bool is_pc_after_call = false); + // shutdown shared instance static void shutdown(); protected: diff --git a/hotspot/src/share/vm/utilities/decoder_elf.cpp b/hotspot/src/share/vm/utilities/decoder_elf.cpp index 9730883..bb72ce1 100644 --- a/hotspot/src/share/vm/utilities/decoder_elf.cpp +++ b/hotspot/src/share/vm/utilities/decoder_elf.cpp @@ -73,4 +73,43 @@ ElfFile* ElfDecoder::get_elf_file(const char* filepath) { return file; } + +bool ElfDecoder::get_source_info(address pc, char* filename, size_t filename_len, int* line, bool is_pc_after_call) { + assert(filename != NULL && filename_len > 0 && line != NULL, "Argument error"); + filename[0] = '\0'; + *line = -1; + + char filepath[JVM_MAXPATHLEN]; + filepath[JVM_MAXPATHLEN - 1] = '\0'; + int offset_in_library = -1; + if (!os::dll_address_to_library_name(pc, filepath, sizeof(filepath), &offset_in_library)) { + // Method not found. offset_in_library should not overflow. + DWARF_LOG_ERROR("Did not find library for address " INTPTR_FORMAT, p2i(pc)) + return false; + } + + if (filepath[JVM_MAXPATHLEN - 1] != '\0') { + DWARF_LOG_ERROR("File path is too large to fit into buffer of size %d", JVM_MAXPATHLEN); + return false; + } + + const uint32_t unsigned_offset_in_library = (uint32_t)offset_in_library; + + ElfFile* file = get_elf_file(filepath); + if (file == NULL) { + return false; + } + DWARF_LOG_INFO("##### Find filename and line number for offset " PTR32_FORMAT " in library %s #####", + unsigned_offset_in_library, filepath); + + if (!file->get_source_info(unsigned_offset_in_library, filename, filename_len, line, is_pc_after_call)) { + return false; + } + + DWARF_LOG_SUMMARY("pc: " INTPTR_FORMAT ", offset: " PTR32_FORMAT ", filename: %s, line: %u", + p2i(pc), offset_in_library, filename, *line); + DWARF_LOG_INFO("\n") // To structure the debug output better. + return true; +} + #endif // !_WINDOWS && !__APPLE__ diff --git a/hotspot/src/share/vm/utilities/decoder_elf.hpp b/hotspot/src/share/vm/utilities/decoder_elf.hpp index e92c958..5551f42 100644 --- a/hotspot/src/share/vm/utilities/decoder_elf.hpp +++ b/hotspot/src/share/vm/utilities/decoder_elf.hpp @@ -48,6 +48,8 @@ public: return false; } + bool get_source_info(address pc, char* buf, size_t buflen, int* line, bool is_pc_after_call); + private: ElfFile* get_elf_file(const char* filepath); diff --git a/hotspot/src/share/vm/utilities/elfFile.cpp b/hotspot/src/share/vm/utilities/elfFile.cpp index ac943bd..81bd441 100644 --- a/hotspot/src/share/vm/utilities/elfFile.cpp +++ b/hotspot/src/share/vm/utilities/elfFile.cpp @@ -32,12 +32,41 @@ #include <new> #include "memory/allocation.inline.hpp" +#include "memory/resourceArea.hpp" #include "utilities/decoder.hpp" #include "utilities/elfFile.hpp" #include "utilities/elfFuncDescTable.hpp" #include "utilities/elfStringTable.hpp" #include "utilities/elfSymbolTable.hpp" +const char* ElfFile::USR_LIB_DEBUG_DIRECTORY = "/usr/lib/debug"; + +bool FileReader::read(void* buf, size_t size) { + assert(buf != NULL, "no buffer"); + assert(size > 0, "no space"); + return fread(buf, size, 1, _fd) == 1; +} + +size_t FileReader::read_buffer(void* buf, size_t size) { + assert(buf != NULL, "no buffer"); + assert(size > 0, "no space"); + return fread(buf, 1, size, _fd); +} + +bool FileReader::set_position(long offset) { + return fseek(_fd, offset, SEEK_SET) == 0; +} + +MarkedFileReader::MarkedFileReader(FILE* fd) : FileReader(fd) { + _marked_pos = ftell(fd); +} + +MarkedFileReader::~MarkedFileReader() { + if (_marked_pos != -1) { + set_position(_marked_pos); + } +} + ElfFile::ElfFile(const char* filepath) { assert(filepath, "null file path"); @@ -47,6 +76,8 @@ ElfFile::ElfFile(const char* filepath) { m_funcDesc_table = NULL; m_next = NULL; m_status = NullDecoder::no_error; + m_shdr_string_table = NULL; + m_dwarf_file = NULL; int len = strlen(filepath) + 1; m_filepath = (const char*)os::malloc(len * sizeof(char), mtInternal); @@ -83,6 +114,16 @@ ElfFile::~ElfFile() { if (m_next != NULL) { delete m_next; } + + if (m_shdr_string_table != NULL) { + delete m_shdr_string_table; + } + + if (m_dwarf_file != NULL) { + delete m_dwarf_file; + m_dwarf_file = NULL; + } + }; @@ -128,7 +169,12 @@ bool ElfFile::load_tables() { m_status = NullDecoder::out_of_memory; return false; } - add_string_table(table); + if (index == m_elfHdr.e_shstrndx) { + assert(m_shdr_string_table == NULL, "Only set once"); + m_shdr_string_table = table; + } else { + add_string_table(table); + } } else if (shdr.sh_type == SHT_SYMTAB || shdr.sh_type == SHT_DYNSYM) { // symbol tables ElfSymbolTable* table = new (std::nothrow) ElfSymbolTable(m_file, shdr); @@ -270,4 +316,1466 @@ bool ElfFile::specifies_noexecstack() { } #endif +bool ElfFile::get_source_info(const uint32_t offset_in_library, char* filename, const size_t filename_len, + int* line, bool is_pc_after_call) { + ResourceMark rm; + if (!load_dwarf_file()) { + // Some ELF libraries do not provide separate .debuginfo files. Check if the current ELF file has the required + // DWARF sections. If so, treat the current ELF file as DWARF file. + if (!is_valid_dwarf_file()) { + DWARF_LOG_ERROR("Failed to load DWARF file for library %s or find DWARF sections directly inside it.", m_filepath); + return false; + } + DWARF_LOG_INFO("No separate .debuginfo file for library %s. It already contains the required DWARF sections.", + m_filepath); + if (!create_new_dwarf_file(m_filepath)) { + return false; + } + } + + // Store result in filename and line pointer. + if (!m_dwarf_file->get_filename_and_line_number(offset_in_library, filename, filename_len, line, is_pc_after_call)) { + DWARF_LOG_ERROR("Failed to retrieve file and line number information for %s at offset: " PTR32_FORMAT, m_filepath, + offset_in_library); + return false; + } + return true; +} + +bool ElfFile::is_valid_dwarf_file() const { + Elf_Shdr shdr; + return read_section_header(".debug_abbrev", shdr) && read_section_header(".debug_aranges", shdr) + && read_section_header(".debug_info", shdr) && read_section_header(".debug_line", shdr); +} + +// (1) Load the debuginfo file from the path specified in this ELF file in the .gnu_debuglink section. +// Adapted from Serviceability Agent. +bool ElfFile::load_dwarf_file() { + if (m_dwarf_file != NULL) { + return true; // Already opened. + } + + DebugInfo debug_info; + if (!read_debug_info(&debug_info)) { + DWARF_LOG_DEBUG("Could not read debug info from .gnu_debuglink section"); + return false; + } + + DwarfFilePath dwarf_file_path(debug_info); + return load_dwarf_file_from_same_directory(dwarf_file_path) + || load_dwarf_file_from_env_var_path(dwarf_file_path) + || load_dwarf_file_from_debug_sub_directory(dwarf_file_path) + || load_dwarf_file_from_usr_lib_debug(dwarf_file_path); +} + +// Read .gnu_debuglink section which contains: +// Filename (null terminated) + 0-3 padding bytes (to 4 byte align) + CRC (4 bytes) +bool ElfFile::read_debug_info(DebugInfo* debug_info) const { + Elf_Shdr shdr; + if (!read_section_header(".gnu_debuglink", shdr)) { + DWARF_LOG_DEBUG("Failed to read the .gnu_debuglink header."); + return false; + } + + if (shdr.sh_size % 4 != 0) { + DWARF_LOG_ERROR(".gnu_debuglink section is not 4 byte aligned (i.e. file is corrupted)"); + return false; + } + + MarkedFileReader mfd(fd()); + if (!mfd.has_mark() || !mfd.set_position(m_elfHdr.e_shoff)) { + return false; + } + + uint64_t filename_max_len = shdr.sh_size - DebugInfo::CRC_LEN; + mfd.set_position(shdr.sh_offset); + if (!mfd.read(&debug_info->_dwarf_filename, filename_max_len)) { + return false; + } + + if (debug_info->_dwarf_filename[filename_max_len - 1] != '\0') { + // Filename not null-terminated (i.e. overflowed). + DWARF_LOG_ERROR("Dwarf filename is not null-terminated"); + return false; + } + + return mfd.read(&debug_info->_crc, DebugInfo::CRC_LEN); +} + +bool ElfFile::DwarfFilePath::set(const char* src) { + int bytes_written = jio_snprintf(_path, MAX_DWARF_PATH_LENGTH, "%s", src); + if (bytes_written < 0 || bytes_written >= MAX_DWARF_PATH_LENGTH) { + DWARF_LOG_ERROR("Dwarf file path buffer is too small"); + return false; + } + update_null_terminator_index(); + return check_valid_path(); // Sanity check +} + +bool ElfFile::DwarfFilePath::set_after_last_slash(const char* src) { + char* last_slash = strrchr(_path, '/'); + if (last_slash == NULL) { + // Should always find a slash. + return false; + } + + uint16_t index_after_slash = (uint16_t)(last_slash + 1 - _path); + return copy_to_path_index(index_after_slash, src); +} + +bool ElfFile::DwarfFilePath::append(const char* src) { + return copy_to_path_index(_null_terminator_index, src); +} + +bool ElfFile::DwarfFilePath::copy_to_path_index(uint16_t index_in_path, const char* src) { + if (index_in_path >= MAX_DWARF_PATH_LENGTH - 1) { + // Should not override '\0' at _path[MAX_DWARF_PATH_LENGTH - 1] + DWARF_LOG_ERROR("Dwarf file path buffer is too small"); + return false; + } + + uint16_t max_len = MAX_DWARF_PATH_LENGTH - index_in_path; + int bytes_written = jio_snprintf(_path + index_in_path, max_len, "%s", src); + if (bytes_written < 0 || bytes_written >= max_len) { + DWARF_LOG_ERROR("Dwarf file path buffer is too small"); + return false; + } + update_null_terminator_index(); + return check_valid_path(); // Sanity check +} + +// Try to load the dwarf file from the same directory as the library file. +bool ElfFile::load_dwarf_file_from_same_directory(DwarfFilePath& dwarf_file_path) { + if (!dwarf_file_path.set(m_filepath) + || !dwarf_file_path.set_filename_after_last_slash()) { + return false; + } + return open_valid_debuginfo_file(dwarf_file_path); +} + +// Try to load the dwarf file from a user specified path in environmental variable _JVM_DWARF_PATH. +bool ElfFile::load_dwarf_file_from_env_var_path(DwarfFilePath& dwarf_file_path) { + const char* dwarf_path_from_env = ::getenv("_JVM_DWARF_PATH"); + if (dwarf_path_from_env != NULL) { + DWARF_LOG_DEBUG("_JVM_DWARF_PATH: %s", dwarf_path_from_env); + return (load_dwarf_file_from_env_path_folder(dwarf_file_path, dwarf_path_from_env, "/lib/server/") + || load_dwarf_file_from_env_path_folder(dwarf_file_path, dwarf_path_from_env, "/lib/") + || load_dwarf_file_from_env_path_folder(dwarf_file_path, dwarf_path_from_env, "/bin/") + || load_dwarf_file_from_env_path_folder(dwarf_file_path, dwarf_path_from_env, "/")); + } + return false; +} + +bool ElfFile::load_dwarf_file_from_env_path_folder(DwarfFilePath& dwarf_file_path, const char* dwarf_path_from_env, + const char* folder) { + if (!dwarf_file_path.set(dwarf_path_from_env) + || !dwarf_file_path.append(folder) + || !dwarf_file_path.append(dwarf_file_path.filename())) { + DWARF_LOG_ERROR("Dwarf file path buffer is too small"); + return false; + } + return open_valid_debuginfo_file(dwarf_file_path); +} + +// Try to load the dwarf file from a subdirectory named .debug within the directory of the library file. +bool ElfFile::load_dwarf_file_from_debug_sub_directory(DwarfFilePath& dwarf_file_path) { + if (!dwarf_file_path.set(m_filepath) + || !dwarf_file_path.set_after_last_slash(".debug/") + || !dwarf_file_path.append(dwarf_file_path.filename())) { + DWARF_LOG_ERROR("Dwarf file path buffer is too small"); + return false; + } + return open_valid_debuginfo_file(dwarf_file_path); +} + +// Try to load the dwarf file from /usr/lib/debug + the full pathname. +bool ElfFile::load_dwarf_file_from_usr_lib_debug(DwarfFilePath& dwarf_file_path) { + if (!dwarf_file_path.set(USR_LIB_DEBUG_DIRECTORY) + || !dwarf_file_path.append(m_filepath) + || !dwarf_file_path.set_filename_after_last_slash()) { + DWARF_LOG_ERROR("Dwarf file path buffer is too small"); + return false; + } + return open_valid_debuginfo_file(dwarf_file_path); +} + +bool ElfFile::read_section_header(const char* name, Elf_Shdr& hdr) const { + if (m_shdr_string_table == NULL) { + assert(false, "section header string table should be loaded"); + return false; + } + const uint8_t buf_len = 24; + char buf[buf_len]; + size_t len = strlen(name) + 1; + if (len > buf_len) { + DWARF_LOG_ERROR("Section header name buffer is too small: Required: %zu, Found: %d", len, buf_len); + return false; + } + + MarkedFileReader mfd(fd()); + if (!mfd.has_mark() || !mfd.set_position(m_elfHdr.e_shoff)) { + return false; + } + + for (int index = 0; index < m_elfHdr.e_shnum; index++) { + if (!mfd.read((void*)&hdr, sizeof(hdr))) { + return false; + } + if (m_shdr_string_table->string_at(hdr.sh_name, buf, buf_len)) { + if (strncmp(buf, name, buf_len) == 0) { + return true; + } + } + } + return false; +} + +// Taken from https://sourceware.org/gdb/current/onlinedocs/gdb/Separate-Debug-Files.html#Separate-Debug-Files +static const uint32_t crc32_table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d + }; + +bool ElfFile::open_valid_debuginfo_file(const DwarfFilePath& dwarf_file_path) { + if (m_dwarf_file != NULL) { + // Already opened. + return true; + } + + const char* filepath = dwarf_file_path.path(); + FILE* file = fopen(filepath, "r"); + if (file == NULL) { + DWARF_LOG_DEBUG("Could not open dwarf file %s ", filepath); + return false; + } + + uint32_t file_crc = get_file_crc(file); + fclose(file); // Close it here to reopen it again when the DwarfFile object is created below. + + if (dwarf_file_path.crc() != file_crc) { + // Must be equal, otherwise the file is corrupted. + DWARF_LOG_ERROR("CRC did not match. Expected: " PTR32_FORMAT ", found: " PTR32_FORMAT, dwarf_file_path.crc(), + file_crc); + return false; + } + return create_new_dwarf_file(filepath); +} + +uint32_t ElfFile::get_file_crc(FILE* const file) { + uint32_t file_crc = 0; + uint8_t buffer[8 * 1024]; + MarkedFileReader reader(file); + while (true) { + size_t len = reader.read_buffer(buffer, sizeof(buffer)); + if (len == 0) { + break; + } + file_crc = gnu_debuglink_crc32(file_crc, buffer, len); + } + return file_crc; +} + +// The CRC used in gnu_debuglink, retrieved from +// http://sourceware.org/gdb/current/onlinedocs/gdb/Separate-Debug-Files.html#Separate-Debug-Files. +uint32_t ElfFile::gnu_debuglink_crc32(uint32_t crc, uint8_t* buf, const size_t len) { + crc = ~crc; + for (uint8_t* end = buf + len; buf < end; buf++) { + crc = crc32_table[(crc ^ *buf) & 0xffu] ^ (crc >> 8u); + } + return ~crc; +} + +bool ElfFile::create_new_dwarf_file(const char* filepath) { + DWARF_LOG_SUMMARY("Open DWARF file: %s", filepath); + m_dwarf_file = new (std::nothrow) DwarfFile(filepath); + if (m_dwarf_file == NULL) { + DWARF_LOG_ERROR("Failed to create new DwarfFile object for %s.", m_filepath); + return false; + } + if (!m_dwarf_file->is_valid_dwarf_file()) { + DWARF_LOG_ERROR("Did not find required DWARF sections in %s", filepath); + return false; + } + return true; +} + +// Starting point of reading line number and filename information from the DWARF file. +bool DwarfFile::get_filename_and_line_number(const uint32_t offset_in_library, char* filename, const size_t filename_len, + int* line, const bool is_pc_after_call) { + DebugAranges debug_aranges(this); + uint32_t compilation_unit_offset = 0; // 4-bytes for 32-bit DWARF + if (!debug_aranges.find_compilation_unit_offset(offset_in_library, &compilation_unit_offset)) { + DWARF_LOG_ERROR("Failed to find .debug_info offset for the compilation unit."); + return false; + } + DWARF_LOG_INFO(".debug_info offset: " PTR32_FORMAT, compilation_unit_offset); + + CompilationUnit compilation_unit(this, compilation_unit_offset); + uint32_t debug_line_offset = 0; // 4-bytes for 32-bit DWARF + if (!compilation_unit.find_debug_line_offset(&debug_line_offset)) { + DWARF_LOG_ERROR("Failed to find .debug_line offset for the line number program."); + return false; + } + DWARF_LOG_INFO(".debug_line offset: " PTR32_FORMAT, debug_line_offset); + + LineNumberProgram line_number_program(this, offset_in_library, debug_line_offset, is_pc_after_call); + if (!line_number_program.find_filename_and_line_number(filename, filename_len, line)) { + DWARF_LOG_ERROR("Failed to process the line number program correctly."); + return false; + } + return true; +} + +// (2) The .debug_aranges section contains a number of entries/sets. Each set contains one or multiple address range descriptors of the +// form [beginning_address, beginning_address+length). Start reading these sets and their descriptors until we find one that contains +// 'offset_in_library'. Read the debug_info_offset field from the header of this set which defines the offset for the compilation unit. +// This process is described in section 6.1.2 of the DWARF 4 spec. +bool DwarfFile::DebugAranges::find_compilation_unit_offset(const uint32_t offset_in_library, uint32_t* compilation_unit_offset) { + if (!read_section_header()) { + DWARF_LOG_ERROR("Failed to read a .debug_aranges header."); + return false; + } + + DebugArangesSetHeader set_header; + bool found_matching_set = false; + while (_reader.has_bytes_left()) { + // Read multiple sets and therefore multiple headers. + if (!read_set_header(set_header)) { + DWARF_LOG_ERROR("Failed to read a .debug_aranges header."); + return false; + } + + if (!read_address_descriptors(offset_in_library, found_matching_set)) { + return false; + } + + if (found_matching_set) { + // Found the correct set, read the debug_info_offset from the header of this set. + DWARF_LOG_INFO(".debug_aranges offset: " PTR32_FORMAT, (uint32_t)_reader.get_position()); + *compilation_unit_offset = set_header._debug_info_offset; + return true; + } + } + + assert(false, "No address descriptor found containing offset_in_library."); + return false; +} + +bool DwarfFile::DebugAranges::read_section_header() { + Elf_Shdr shdr; + if (!_dwarf_file->read_section_header(".debug_aranges", shdr)) { + return false; + } + + _section_start_address = shdr.sh_offset; + _reader.set_max_pos(shdr.sh_offset + shdr.sh_size); + return _reader.set_position(shdr.sh_offset); +} + +// Parse set header as specified in section 6.1.2 of the DWARF 4 spec. +bool DwarfFile::DebugAranges::read_set_header(DebugArangesSetHeader& header) { + if (!_reader.read_dword(&header._unit_length) || header._unit_length == 0xFFFFFFFF) { + // For 64-bit DWARF, the first 32-bit value is 0xFFFFFFFF. The current implementation only supports 32-bit DWARF + // format since GCC only emits 32-bit DWARF. + DWARF_LOG_ERROR("64-bit DWARF is not supported for .debug_aranges") + return false; + } + + if (!_reader.read_word(&header._version) || header._version != 2) { + // DWARF 4 uses version 2 as specified in Appendix F of the DWARF 4 spec. + DWARF_LOG_ERROR(".debug_aranges in unsupported DWARF version %" PRIu16, header._version) + return false; + } + + if (!_reader.read_dword(&header._debug_info_offset)) { + return false; + } + + if (!_reader.read_byte(&header._address_size) || header._address_size != DwarfFile::ADDRESS_SIZE) { + // Addresses must be either 4 bytes for 32-bit architectures or 8 bytes for 64-bit architectures. + DWARF_LOG_ERROR(".debug_aranges specifies wrong address size %" PRIu8, header._address_size); + return false; + } + + if (!_reader.read_byte(&header._segment_size) || header._segment_size != 0) { + // Segment size should be 0. + DWARF_LOG_ERROR(".debug_aranges segment size is non-zero: %" PRIu8, header._segment_size); + return false; + } + + // We must align to twice the address size. + uint8_t alignment = DwarfFile::ADDRESS_SIZE * 2; + uint8_t padding = alignment - (_reader.get_position() - _section_start_address) % alignment; + return _reader.move_position(padding); +} + +bool DwarfFile::DebugAranges::read_address_descriptors(const uint32_t offset_in_library, bool& found_matching_set) { + AddressDescriptor descriptor; + do { + if (!read_address_descriptor(descriptor)) { + return false; + } + + if (does_match_offset(offset_in_library, descriptor)) { + found_matching_set = true; + return true; + } + } while (!is_terminating_entry(descriptor) && _reader.has_bytes_left()); + + // Set does not match offset_in_library. Continue with next. + return true; +} + +bool DwarfFile::DebugAranges::read_address_descriptor(AddressDescriptor& descriptor) { + return _reader.read_address_sized(&descriptor.beginning_address) + && _reader.read_address_sized(&descriptor.range_length); +} + +bool DwarfFile::DebugAranges::does_match_offset(const uint32_t offset_in_library, const AddressDescriptor& descriptor) { + return descriptor.beginning_address <= offset_in_library + && offset_in_library < descriptor.beginning_address + descriptor.range_length; +} + +bool DwarfFile::DebugAranges::is_terminating_entry(const AddressDescriptor& descriptor) { + return descriptor.beginning_address == 0 && descriptor.range_length == 0; +} + +// Find the .debug_line offset for the line number program by reading from the .debug_abbrev and .debug_info section. +bool DwarfFile::CompilationUnit::find_debug_line_offset(uint32_t* debug_line_offset) { + // (3a,b) + if (!read_header()) { + DWARF_LOG_ERROR("Failed to read the compilation unit header."); + return false; + } + + // (3c) Read the abbreviation code immediately following the compilation unit header which is an offset to the + // correct abbreviation table in .debug_abbrev for this compilation unit. + uint64_t abbrev_code; + if (!_reader.read_uleb128(&abbrev_code)) { + return false; + } + + DebugAbbrev debug_abbrev(_dwarf_file, this); + if (!debug_abbrev.read_section_header(_header._debug_abbrev_offset)) { + DWARF_LOG_ERROR("Failed to read the .debug_abbrev header at " PTR32_FORMAT, _header._debug_abbrev_offset); + return false; + } + if (!debug_abbrev.find_debug_line_offset(abbrev_code)) { + return false; + } + *debug_line_offset = _debug_line_offset; // Result was stored in _debug_line_offset. + return true; +} + +// (3a) Parse header as specified in section 7.5.1.1 of the DWARF 4 spec. +bool DwarfFile::CompilationUnit::read_header() { + Elf_Shdr shdr; + if (!_dwarf_file->read_section_header(".debug_info", shdr)) { + DWARF_LOG_ERROR("Failed to read the .debug_info section header."); + return false; + } + + if (!_reader.set_position(shdr.sh_offset + _compilation_unit_offset)) { + return false; + } + + if (!_reader.read_dword(&_header._unit_length) || _header._unit_length == 0xFFFFFFFF) { + // For 64-bit DWARF, the first 32-bit value is 0xFFFFFFFF. The current implementation only supports 32-bit DWARF + // format since GCC only emits 32-bit DWARF. + DWARF_LOG_ERROR("64-bit DWARF is not supported for .debug_info") + return false; + } + + if (!_reader.read_word(&_header._version) || _header._version != 4) { + // DWARF 4 uses version 4 as specified in Appendix F of the DWARF 4 spec. + DWARF_LOG_ERROR(".debug_info in unsupported DWARF version %" PRIu16, _header._version) + return false; + } + + // (3b) Offset into .debug_abbrev section. + if (!_reader.read_dword(&_header._debug_abbrev_offset)) { + return false; + } + + if (!_reader.read_byte(&_header._address_size) || _header._address_size != DwarfFile::ADDRESS_SIZE) { + // Addresses must be either 4 bytes for 32-bit architectures or 8 bytes for 64-bit architectures. + DWARF_LOG_ERROR(".debug_info specifies wrong address size %" PRIu8, _header._address_size); + return false; + } + + // Add because _unit_length is not included. + _reader.set_max_pos(_reader.get_position() + _header._unit_length + 4); + return true; +} + +bool DwarfFile::DebugAbbrev::read_section_header(uint32_t debug_abbrev_offset) { + Elf_Shdr shdr; + if (!_dwarf_file->read_section_header(".debug_abbrev", shdr)) { + return false; + } + + _reader.set_max_pos(shdr.sh_offset + shdr.sh_size); + if (!_reader.set_position(shdr.sh_offset + debug_abbrev_offset)) { + return false; + } + return true; +} + +// (3d) The abbreviations table for a compilation unit consists of a series of abbreviation declarations. Each declaration +// specifies an abbrev code and a tag. Parse all declarations until we find the declaration which matches 'abbrev_code'. +// Read the attribute values from the compilation unit in .debug_info by using the format described in the declaration. +// This process is described in section 7.5 and 7.5.3 of the DWARF 4 spec. +bool DwarfFile::DebugAbbrev::find_debug_line_offset(const uint64_t abbrev_code) { + DWARF_LOG_TRACE("Series of declarations [code, tag]:"); + AbbreviationDeclaration declaration; + while (_reader.has_bytes_left()) { + if (!read_declaration(declaration)) { + return false; + } + + DWARF_LOG_TRACE(" Series of attributes [name, form]:"); + if (declaration._abbrev_code == abbrev_code) { + // Found the correct declaration. + if (is_wrong_or_unsupported_format(declaration)) { + return false; + } + DWARF_LOG_INFO(".debug_abbrev offset: " PTR32_FORMAT, (uint32_t)_reader.get_position()); + DWARF_LOG_TRACE(" Read the following attribute values from compilation unit:"); + return read_attribute_specifications(true); + } else { + // Not the correct declaration. Read its attributes and continue with the next declaration. + if (!read_attribute_specifications(false)) { + return false; + } + } + } + + assert(false, ".debug_line offset not found"); + return false; +} + +bool DwarfFile::DebugAbbrev::read_declaration(DwarfFile::DebugAbbrev::AbbreviationDeclaration& declaration) { + if (!_reader.read_uleb128(&declaration._abbrev_code)) { + return false; + } + + if (declaration._abbrev_code == 0) { + // Reached the end of the abbreviation declarations for this compilation unit. + DWARF_LOG_ERROR("abbrev_code not found in any declaration"); + return false; + } + + if (!_reader.read_uleb128(&declaration._tag) || !_reader.read_byte(&declaration._has_children)) { + return false; + } + + DWARF_LOG_TRACE("Code: 0x" UINT64_FORMAT_X ", Tag: 0x" UINT64_FORMAT_X, declaration._abbrev_code, declaration._tag); + return true; +} + +bool DwarfFile::DebugAbbrev::is_wrong_or_unsupported_format(const DwarfFile::DebugAbbrev::AbbreviationDeclaration& declaration) { + if (declaration._tag != DW_TAG_compile_unit) { + // Is not DW_TAG_compile_unit as specified in Figure 18 in section 7.5 of the DWARF 4 spec. It could also + // be DW_TAG_partial_unit (0x3c) which is currently not supported by this parser. + DWARF_LOG_ERROR("Found unsupported tag in compilation unit: " UINT64_FORMAT_X, declaration._tag); + return true; + } + if (declaration._has_children != DW_CHILDREN_yes) { + DWARF_LOG_ERROR("Must have children but none specified"); + return true; + } + return false; +} + +// Read the attribute names and forms which define the actual attribute values that follow the abbrev code in the compilation unit. All +// attributes need to be read from the compilation unit until we reach the DW_AT_stmt_list attribute which specifies the offset for the +// line number program into the .debug_line section. The offset is stored in the _debug_line_offset field of the compilation unit. +bool DwarfFile::DebugAbbrev::read_attribute_specifications(const bool is_DW_TAG_compile_unit) { + AttributeSpecification attribute_specification; + while (_reader.has_bytes_left()) { + if (!read_attribute_specification(attribute_specification)) { + return false; + } + + if (is_terminating_specification(attribute_specification)) { + // Parsed all attributes of this declaration. + if (is_DW_TAG_compile_unit) { + DWARF_LOG_ERROR("Did not find DW_AT_stmt_list in .debug_abbrev"); + return false; + } else { + // Continue with next declaration if this was not DW_TAG_compile_unit. + return true; + } + } + + if (is_DW_TAG_compile_unit) { + // Read attribute from compilation unit + if (attribute_specification._name == DW_AT_stmt_list) { + // This attribute represents the .debug_line offset. Read it and then stop parsing. + return _compilation_unit->read_attribute_value(attribute_specification._form, true); + } else { + // Not DW_AT_stmt_list, read it and continue with the next attribute. + if (!_compilation_unit->read_attribute_value(attribute_specification._form, false)) { + return false; + } + } + } + } + + assert(false, ".debug_abbrev section appears to be corrupted"); + return false; +} + +bool DwarfFile::DebugAbbrev::read_attribute_specification(DwarfFile::DebugAbbrev::AttributeSpecification& specification) { + bool result = _reader.read_uleb128(&specification._name) && _reader.read_uleb128(&specification._form); + DWARF_LOG_TRACE(" Name: 0x" UINT64_FORMAT_X ", Form: 0x" UINT64_FORMAT_X, + specification._name, specification._form); + return result; +} + +bool DwarfFile::DebugAbbrev::is_terminating_specification(const DwarfFile::DebugAbbrev::AttributeSpecification& specification) { + return specification._name == 0 && specification._form == 0; +} + + +// (3e) Read the actual attribute values from the compilation unit in the .debug_info section. Each attribute has an encoding +// that specifies which values need to be read for it. This is specified in section 7.5.4 of the DWARF 4 spec. +// If is_DW_AT_stmt_list_attribute is: +// - False: Ignore the read attribute value. +// - True: We are going to read the attribute value of the DW_AT_stmt_list attribute which specifies the offset into the +// .debug_line section for the line number program. Store this offset in the _debug_line_offset field. +bool DwarfFile::CompilationUnit::read_attribute_value(const uint64_t attribute_form, const bool is_DW_AT_stmt_list_attribute) { + // Reset to the stored _cur_pos of the reader since the DebugAbbrev reader changed the index into the file with its reader. + _reader.update_to_stored_position(); + uint8_t next_byte = 0; + uint16_t next_word = 0; + uint32_t next_dword = 0; + uint64_t next_qword = 0; + + switch (attribute_form) { + case DW_FORM_addr: + // Move position by the size of an address. + _reader.move_position(DwarfFile::ADDRESS_SIZE); + break; + case DW_FORM_block2: + // New position: length + data length (next_word) + if (!_reader.read_word(&next_word) || !_reader.move_position(next_word)) { + return false; + } + break; + case DW_FORM_block4: + // New position: length + data length (next_dword) + if (!_reader.read_dword(&next_dword) || !_reader.move_position(next_dword)) { + return false; + } + break; + case DW_FORM_data2: + case DW_FORM_ref2: + if (!_reader.move_position(2)) { + return false; + } + break; + case DW_FORM_data4: + case DW_FORM_strp: // 4 bytes in 32-bit DWARF + case DW_FORM_ref_addr: // second type of reference: 4 bytes in 32-bit DWARF + case DW_FORM_ref4: + if (!_reader.move_position(4)) { + return false; + } + break; + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: // 64-bit type signature + if (!_reader.move_position(8)) { + return false; + } + break; + case DW_FORM_string: + if (!_reader.read_string()) { + return false; + } + break; + case DW_FORM_block: + case DW_FORM_exprloc: + // New position: length + data length (next_qword). + if (!_reader.read_uleb128(&next_qword) || !_reader.move_position(next_qword)) { + return false; + } + break; + case DW_FORM_block1: + // New position: length + data length (next_byte). + if (!_reader.read_byte(&next_byte) || !_reader.move_position(next_byte)) { + return false; + } + break; + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_flag: + case DW_FORM_flag_present: + if (!_reader.move_position(1)) { + return false; + } + break; + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + if (!_reader.read_uleb128(&next_qword)) { + return false; + } + break; + case DW_FORM_indirect: + // Should not be used and therefore is not supported by this parser. + DWARF_LOG_ERROR("DW_FORM_indirect is not supported."); + return false; + case DW_FORM_sec_offset: + if (is_DW_AT_stmt_list_attribute) { + // DW_AT_stmt_list has the DW_FORM_sec_offset attribute encoding. Store the result in _debug_line_offset. + // 4 bytes for 32-bit DWARF. + DWARF_LOG_TRACE(" Name: DW_AT_stmt_list, Form: DW_FORM_sec_offset"); + DWARF_LOG_TRACE(" Reading .debug_line offset from compilation unit at " PTR32_FORMAT, + (uint32_t)_reader.get_position()); + if (!_reader.read_dword(&_debug_line_offset)) { + return false; + } + break; + } else { + if (!_reader.move_position(DwarfFile::DWARF_SECTION_OFFSET_SIZE)) { + return false; + } + break; + } + default: + assert(false, "Unknown DW_FORM_* attribute encoding."); + return false; + } + // Reset the index into the file to the original position where the DebugAbbrev reader stopped reading before calling this method. + _reader.reset_to_previous_position(); + return true; +} + +bool DwarfFile::LineNumberProgram::find_filename_and_line_number(char* filename, const size_t filename_len, int* line) { + if (!read_header()) { + DWARF_LOG_ERROR("Failed to parse the line number program header correctly."); + return false; + } + return run_line_number_program(filename, filename_len, line); +} + +// Parsing header as specified in section 6.2.4 of DWARF 4 spec. We do not read the file_names field, yet. +bool DwarfFile::LineNumberProgram::read_header() { + Elf_Shdr shdr; + if (!_dwarf_file->read_section_header(".debug_line", shdr)) { + DWARF_LOG_ERROR("Failed to read the .debug_line section header."); + return false; + } + + if (!_reader.set_position(shdr.sh_offset + _debug_line_offset)) { + return false; + } + + if (!_reader.read_dword(&_header._unit_length) || _header._unit_length == 0xFFFFFFFF) { + // For 64-bit DWARF, the first 32-bit value is 0xFFFFFFFF. The current implementation only supports 32-bit DWARF + // format since GCC only emits 32-bit DWARF. + DWARF_LOG_ERROR("64-bit DWARF is not supported for .debug_line") + return false; + } + + if (!_reader.read_word(&_header._version) || _header._version < 2 || _header._version > 4) { + // DWARF 3 uses version 3 and DWARF 4 uses version 4 as specified in Appendix F of the DWARF 3 and 4 spec, respectively. + // For some reason, GCC is not following the standard here. While GCC emits DWARF 4 for the other parsed sections, + // it chooses a different DWARF standard for .debug_line based on the GCC version: + // - GCC 8 and earlier: .debug_line is in DWARF 2 format (= version 2). + // - GCC 9 and 10: .debug_line is in DWARF 3 format (= version 3). + // - GCC 11: .debug_line is in DWARF 4 format (= version 4). + DWARF_LOG_ERROR(".debug_line in unsupported DWARF version %" PRIu16, _header._version) + return false; + } + + if (!_reader.read_dword(&_header._header_length)) { + return false; + } + + // To ensure not to read too many bytes in case of file corruption when reading the path_names field. + _reader.set_max_pos(_reader.get_position() + _header._header_length); + + if (!_reader.read_byte(&_header._minimum_instruction_length)) { + return false; + } + + if (_header._version == 4) { + if (!_reader.read_byte(&_header._maximum_operations_per_instruction)) { + return false; + } + } + + if (!_reader.read_byte(&_header._default_is_stmt)) { + return false; + } + + if (!_reader.read_sbyte(&_header._line_base)) { + return false; + } + + if (!_reader.read_byte(&_header._line_range)) { + return false; + } + + if (!_reader.read_byte(&_header._opcode_base) || _header._opcode_base - 1 != 12) { + // There are 12 standard opcodes for DWARF 3 and 4. + DWARF_LOG_ERROR("Wrong number of opcodes: %" PRIu8, _header._opcode_base) + return false; + } + + for (uint8_t i = 0; i < _header._opcode_base - 1; i++) { + if (!_reader.read_byte(&_header._standard_opcode_lengths[i])) { + return false; + } + } + + // Read field include_directories which is a sequence of path names. These are terminated by a single null byte. + // We do not care about them, just read the strings and move on. + while (_reader.read_string()) { } + + // Delay reading file_names until we found the correct file index in the line number program. Store the position where + // the file names start to parse them later. We directly jump to the line number program which starts at offset + // header_size (=HEADER_DESCRIPTION_BYTES + _header_length) + _debug_line_offset + _header._file_names_offset = _reader.get_position(); + uint32_t header_size = LineNumberProgramHeader::HEADER_DESCRIPTION_BYTES + _header._header_length; + if (!_reader.set_position(shdr.sh_offset + header_size + _debug_line_offset)) { + return false; + } + + // Now reset the max position to where the line number information for this compilation unit ends (i.e. where the state + // machine gets terminated). Add 4 bytes to the offset because the size of the _unit_length field is not included in this + // value. + _reader.set_max_pos(shdr.sh_offset + _debug_line_offset + _header._unit_length + 4); + return true; +} + +// Create the line number information matrix as described in section 6.2 of the DWARF 4 spec. Try to find the correct entry +// by comparing the address register belonging to each matrix row with _offset_in_library. Once it is found, we can read +// the line number from the line register and the filename by parsing the file_names list from the header until we reach +// the correct filename as specified by the file register. +// +// If space was not a problem, the .debug_line section could provide a large matrix that contains an entry for each +// compiler instruction that contains the line number, the column number, the filename etc. But that's impractical. +// Two techniques optimize such a matrix: +// (1) If two offsets share the same file, line and column (and discriminator) information, the row is dropped. +// (2) We store a stream of bytes that represent opcodes to be executed in a well-defined state machine language +// instead of actually storing the entire matrix row by row. +// +// Let's consider a simple example: +// 25: int iFld = 42; +// 26: +// 27: void bar(int i) { +// 28: } +// 29: +// 30: void foo() { +// 31: bar(*iFld); +// 32: } +// +// Disassembly of foo() with source code: +// 30: void foo() { +// 0x55d132: 55 push rbp +// 0x55d133: 48 89 e5 mov rbp,rsp +// 31: bar(*iFld); +// 0x55d136: 48 8b 05 b3 ee e8 01 mov rax,QWORD PTR [rip+0x1e8eeb3] # 23ebff0 <iFld> +// 0x55d13d: 8b 00 mov eax,DWORD PTR [rax] +// 0x55d13f: 89 c7 mov edi,eax +// 0x55d141: e8 e2 ff ff ff call 55d128 <_Z3bari> +// 32: } +// 0x55d146: 90 nop +// 0x55d147: 5d pop rbp +// 0x55d148: c3 ret +// +// This would produce the following matrix for foo() where duplicated lines (0x55d133, 0x55d13d, 0x55d13f) were removed +// according to (1): +// Address: Line: Column: File: +// 0x55d132 30 12 1 +// 0x55d136 31 6 1 +// 0x55d146 32 1 1 +// +// When trying to get the line number for a PC, which is translated into an offset address x into the library file, we can either: +// - Directly find the last entry in the matrix for which address == x (there could be multiple entries with the same address). +// - If there is no matching address for x: +// 1. Find two consecutive entries in the matrix for which: address_entry_1 < x < address_entry_2. +// 2. Then take the entry of address_entry_1. +// E.g. x = 0x55d13f -> 0x55d136 < 0x55d13f < 0x55d146 -> Take entry 0x55d136. +// +// Enable logging with debug level to print the generated line number information matrix. +bool DwarfFile::LineNumberProgram::run_line_number_program(char* filename, const size_t filename_len, int* line) { + DWARF_LOG_DEBUG(" "); + DWARF_LOG_DEBUG("Line Number Information Matrix"); + DWARF_LOG_DEBUG("------------------------------"); +#ifndef _LP64 + DWARF_LOG_DEBUG("Address: Line: Column: File:"); +#else + DWARF_LOG_DEBUG("Address: Line: Column: File:"); +#endif + _state = new (std::nothrow) LineNumberProgramState(_header); + if (_state == NULL) { + DWARF_LOG_ERROR("Failed to create new LineNumberProgramState object"); + return false; + } + uintptr_t previous_address = 0; + uint32_t previous_file = 0; + uint32_t previous_line = 0; + while (_reader.has_bytes_left()) { + if (!apply_opcode()) { + assert(false, "Could not apply opcode"); + return false; + } + + if (_state->_append_row) { + // Append a new line to the line number information matrix. + if (_state->_first_entry_in_sequence) { + // First entry in sequence: Check if _offset_in_library >= _state->address. If not, then all following entries + // belonging to this sequence cannot match our _offset_in_library because the addresses are always increasing + // in a sequence. + _state->_can_sequence_match_offset = _offset_in_library >= _state->_address; + _state->_first_entry_in_sequence = false; + } + if (does_offset_match_entry(previous_address, previous_file, previous_line)) { + // We are using an int for the line number which should never be larger than INT_MAX for any files. + *line = (int)_state->_line; + return get_filename_from_header(_state->_file, filename, filename_len); + } + + // We do not actually store the matrix while searching the correct entry. Enable logging to print/debug it. + DWARF_LOG_DEBUG(INTPTR_FORMAT " %-5u %-3u %-4u", + _state->_address, _state->_line, _state->_column, _state->_file); + previous_file = _state->_file; + previous_line = _state->_line; + previous_address = _state->_address; + _state->_append_row = false; + if (_state->_do_reset) { + // Current sequence terminated. + _state->reset_fields(); + } + } + } + + return false; +} + +// Apply next opcode to update the state machine. +bool DwarfFile::LineNumberProgram::apply_opcode() { + uint8_t opcode; + if (!_reader.read_byte(&opcode)) { + return false; + } + + DWARF_LOG_TRACE(" Opcode: 0x%02x ", opcode); + if (opcode == 0) { + // Extended opcodes start with a zero byte. + if (!apply_extended_opcode()) { + assert(false, "Could not apply extended opcode"); + return false; + } + } else if (opcode <= 12) { + // 12 standard opcodes in DWARF 3 and 4. + if (!apply_standard_opcode(opcode)) { + assert(false, "Could not apply standard opcode"); + return false; + } + } else { + // Special opcodes range from 13 until 255. + apply_special_opcode(opcode); + } + return true; +} + +// Specified in section 6.2.5.3 of the DWARF 4 spec. +bool DwarfFile::LineNumberProgram::apply_extended_opcode() { + uint64_t extended_opcode_length; // Does not include the already written zero byte and the length leb128. + uint8_t extended_opcode; + if (!_reader.read_uleb128(&extended_opcode_length) || !_reader.read_byte(&extended_opcode)) { + return false; + } + + switch (extended_opcode) { + case DW_LNE_end_sequence: // No operands + DWARF_LOG_TRACE(" DW_LNE_end_sequence"); + _state->_end_sequence = true; + _state->_append_row = true; + _state->_do_reset = true; + break; + case DW_LNE_set_address: // 1 operand + if (!_reader.read_address_sized(&_state->_address)) { + return false; + } + DWARF_LOG_TRACE(" DW_LNE_set_address " INTPTR_FORMAT, _state->_address); + if (_state->_dwarf_version == 4) { + _state->_op_index = 0; + } + break; + case DW_LNE_define_file: // 4 operands + DWARF_LOG_TRACE(" DW_LNE_define_file"); + if (!_reader.read_string()) { + return false; + } + // Operand 2-4: uleb128 numbers we do not care about. + if (!_reader.read_uleb128_ignore() + || !_reader.read_uleb128_ignore() + || !_reader.read_uleb128_ignore()) { + return false; + } + break; + case DW_LNE_set_discriminator: // 1 operand + DWARF_LOG_TRACE(" DW_LNE_set_discriminator"); + uint64_t discriminator; + // For some reason, GCC emits this opcode even for earlier versions than DWARF 4 which introduced this opcode. + // We need to consume it. + if (!_reader.read_uleb128(&discriminator, 4)) { + // Must be an unsigned integer as specified in section 6.2.2 of the DWARF 4 spec for the discriminator register. + return false; + } + _state->_discriminator = discriminator; + break; + default: + assert(false, "Unknown extended opcode"); + return false; + } + return true; +} + +// Specified in section 6.2.5.2 of the DWARF 4 spec. +bool DwarfFile::LineNumberProgram::apply_standard_opcode(const uint8_t opcode) { + switch (opcode) { + case DW_LNS_copy: // No operands + DWARF_LOG_TRACE(" DW_LNS_copy"); + _state->_append_row = true; + _state->_basic_block = false; + _state->_prologue_end = false; + _state->_epilogue_begin = false; + if (_state->_dwarf_version == 4) { + _state->_discriminator = 0; + } + break; + case DW_LNS_advance_pc: { // 1 operand + uint64_t operation_advance; + if (!_reader.read_uleb128(&operation_advance, 4)) { + // Must be at most 4 bytes because the index register is only 4 bytes wide. + return false; + } + _state->add_to_address_register(operation_advance, _header); + if (_state->_dwarf_version == 4) { + _state->set_index_register(operation_advance, _header); + } + DWARF_LOG_TRACE(" DW_LNS_advance_pc (" INTPTR_FORMAT ")", _state->_address); + break; + } + case DW_LNS_advance_line: // 1 operand + int64_t line; + if (!_reader.read_sleb128(&line, 4)) { + // line register is 4 bytes wide. + return false; + } + _state->_line += line; + DWARF_LOG_TRACE(" DW_LNS_advance_line (%d)", _state->_line); + break; + case DW_LNS_set_file: // 1 operand + uint64_t file; + if (!_reader.read_uleb128(&file, 4)) { + // file register is 4 bytes wide. + return false; + } + _state->_file = file; + DWARF_LOG_TRACE(" DW_LNS_set_file (%u)", _state->_file); + break; + case DW_LNS_set_column: // 1 operand + uint64_t column; + if (!_reader.read_uleb128(&column, 4)) { + // column register is 4 bytes wide. + return false; + } + _state->_column = column; + DWARF_LOG_TRACE(" DW_LNS_set_column (%u)", _state->_column); + break; + case DW_LNS_negate_stmt: // No operands + DWARF_LOG_TRACE(" DW_LNS_negate_stmt"); + _state->_is_stmt = !_state->_is_stmt; + break; + case DW_LNS_set_basic_block: // No operands + DWARF_LOG_TRACE(" DW_LNS_set_basic_block"); + _state->_basic_block = true; + break; + case DW_LNS_const_add_pc: { // No operands + // Update address and op_index registers by the increments of special opcode 255. + uint8_t adjusted_opcode_255 = 255 - _header._opcode_base; + uint8_t operation_advance = adjusted_opcode_255 / _header._line_range; + uintptr_t old_address = _state->_address; + _state->add_to_address_register(operation_advance, _header); + if (_state->_dwarf_version == 4) { + _state->set_index_register(operation_advance, _header); + } + DWARF_LOG_TRACE(" DW_LNS_const_add_pc (" INTPTR_FORMAT ")", _state->_address - old_address); + break; + } + case DW_LNS_fixed_advance_pc: // 1 operand + uint16_t operand; + if (!_reader.read_word(&operand)) { + return false; + } + _state->_address += operand; + _state->_op_index = 0; + DWARF_LOG_TRACE(" DW_LNS_fixed_advance_pc (" INTPTR_FORMAT ")", _state->_address); + break; + case DW_LNS_set_prologue_end: // No operands + DWARF_LOG_TRACE(" DW_LNS_set_basic_block"); + _state->_prologue_end = true; + break; + case DW_LNS_set_epilogue_begin: // No operands + DWARF_LOG_TRACE(" DW_LNS_set_epilogue_begin"); + _state->_epilogue_begin = true; + break; + case DW_LNS_set_isa: // 1 operand + uint64_t isa; + if (!_reader.read_uleb128(&isa, 4)) { + // isa register is 4 bytes wide. + return false; + } + _state->_isa = isa; + DWARF_LOG_TRACE(" DW_LNS_set_isa (%u)", _state->_isa); + break; + default: + assert(false, "Unknown standard opcode"); + return false; + } + return true; +} + +// Specified in section 6.2.5.1 of the DWARF 4 spec. +void DwarfFile::LineNumberProgram::apply_special_opcode(const uint8_t opcode) { + uintptr_t old_address = _state->_address; + uint32_t old_line = _state->_line; + uint8_t adjusted_opcode = opcode - _header._opcode_base; + uint8_t operation_advance = adjusted_opcode / _header._line_range; + _state->add_to_address_register(operation_advance, _header); + if (_state->_dwarf_version == 4) { + _state->set_index_register(operation_advance, _header); + _state->_discriminator = 0; + } + _state->_line += _header._line_base + (adjusted_opcode % _header._line_range); + DWARF_LOG_TRACE(" address += " INTPTR_FORMAT ", line += %d", _state->_address - old_address, + _state->_line - old_line); + _state->_append_row = true; + _state->_basic_block = false; + _state->_prologue_end = false; + _state->_epilogue_begin = false; +} + +bool DwarfFile::LineNumberProgram::does_offset_match_entry(const uintptr_t previous_address, const uint32_t previous_file, + const uint32_t previous_line) { + if (_state->_can_sequence_match_offset) { + bool matches_entry_directly = _offset_in_library == _state->_address; + if (matches_entry_directly + || (_offset_in_library > previous_address && _offset_in_library < _state->_address)) { // in between two entries + _state->_found_match = true; + if (!matches_entry_directly || _is_pc_after_call) { + // We take the previous row in the matrix either when: + // - We try to match an offset that is between two entries. + // - We have an offset from a PC that is at a call-site in which case we need to get the line information for + // the call instruction in the previous entry. + print_and_store_prev_entry(previous_file, previous_line); + return true; + } else if (!_reader.has_bytes_left()) { + // We take the current entry when this is the very last entry in the matrix (i.e. must be the right one). + DWARF_LOG_DEBUG("^^^ Found line for requested offset " PTR32_FORMAT " ^^^", _offset_in_library); + return true; + } + // Else: Exact match. We cannot take this entry because we do not know if there are more entries following this + // one with the same offset (we could have multiple entries for the same address in the matrix). Continue + // to parse entries. When we have the first non-exact match, then we know that the previous entry is the + // correct one to take (handled in the else-if-case below). If this is the very last entry in a matrix, + // we will take the current entry (handled in else-if-case above). + } else if (_state->_found_match) { + // We found an entry before with an exact match. This is now the first entry with a new offset. Pick the previous + // entry which matches our offset and is guaranteed to be the last entry which matches our offset (if there are + // multiple entries with the same offset). + print_and_store_prev_entry(previous_file, previous_line); + return true; + } + } + return false; +} + +void DwarfFile::LineNumberProgram::print_and_store_prev_entry(const uint32_t previous_file, const uint32_t previous_line) { + _state->_file = previous_file; + _state->_line = previous_line; + DWARF_LOG_DEBUG("^^^ Found line for requested offset " PTR32_FORMAT " ^^^", _offset_in_library); + // Also print the currently parsed entry. + DWARF_LOG_DEBUG(INTPTR_FORMAT " %-5u %-3u %-4u", + _state->_address, _state->_line, _state->_column, _state->_file); +} + +// Read field file_names from the header as specified in section 6.2.4 of the DWARF 4 spec. +bool DwarfFile::LineNumberProgram::get_filename_from_header(const uint32_t file_index, char* filename, const size_t filename_len) { + // We do not need to restore the position afterwards as this is the last step of parsing from the file for this compilation unit. + _reader.set_position(_header._file_names_offset); + uint32_t current_index = 1; // file_names start at index 1 + while (_reader.has_bytes_left()) { + if (!_reader.read_string(filename, filename_len)) { + // Either an error while reading or we have reached the end of the file_names. Both should not happen. + return false; + } + + if (current_index == file_index) { + // Found correct file. + return true; + } + + // We don't care about these values. + if (!_reader.read_uleb128_ignore() // Read directory index + || !_reader.read_uleb128_ignore() // Read last modification of file + || !_reader.read_uleb128_ignore()) { // Read file length + return false; + } + current_index++; + } + DWARF_LOG_DEBUG("Did not find filename entry at index " UINT32_FORMAT " in .debug_line header", file_index); + return false; +} + +void DwarfFile::LineNumberProgram::LineNumberProgramState::reset_fields() { + _address = 0; + _op_index = 0; + _file = 1; + _line = 1; + _column = 0; + _is_stmt = _initial_is_stmt; + _basic_block = false; + _end_sequence = false; + _prologue_end = false; + _epilogue_begin = false; + _isa = 0; + _discriminator = 0; + _append_row = false; + _do_reset = false; + _first_entry_in_sequence = true; + _can_sequence_match_offset = false; +} + +// Defined in section 6.2.5.1 of the DWARF 4 spec. +void DwarfFile::LineNumberProgram::LineNumberProgramState::add_to_address_register(const uint32_t operation_advance, + const LineNumberProgramHeader& header) { + if (_dwarf_version == 2 || _dwarf_version == 3) { + _address += (uintptr_t)(operation_advance * header._minimum_instruction_length); + } else if (_dwarf_version == 4) { + _address += (uintptr_t)(header._minimum_instruction_length * + ((_op_index + operation_advance) / header._maximum_operations_per_instruction)); + } +} + +// Defined in section 6.2.5.1 of the DWARF 4 spec. +void DwarfFile::LineNumberProgram::LineNumberProgramState::set_index_register(const uint32_t operation_advance, + const LineNumberProgramHeader& header) { + _op_index = (_op_index + operation_advance) % header._maximum_operations_per_instruction; +} + +bool DwarfFile::MarkedDwarfFileReader::set_position(const long new_pos) { + if (new_pos < 0) { + return false; + } + _current_pos = new_pos; + return FileReader::set_position(new_pos); +} + +bool DwarfFile::MarkedDwarfFileReader::has_bytes_left() const { + if (_max_pos == -1) { + return false; + } + return _current_pos < _max_pos; +} + +bool DwarfFile::MarkedDwarfFileReader::update_to_stored_position() { + _marked_pos = ftell(_fd); + if (_marked_pos < 0) { + return false; + } + return FileReader::set_position(_current_pos); +} + +bool DwarfFile::MarkedDwarfFileReader::reset_to_previous_position() { + return FileReader::set_position(_marked_pos); +} + +bool DwarfFile::MarkedDwarfFileReader::move_position(const long offset) { + if (offset == 0) { + return true; + } + return set_position(_current_pos + offset); +} + +bool DwarfFile::MarkedDwarfFileReader::read_sbyte(int8_t* result) { + _current_pos++; + return read(result, 1); +} + +bool DwarfFile::MarkedDwarfFileReader::read_byte(uint8_t* result) { + _current_pos++; + return read(result, 1); +} + +bool DwarfFile::MarkedDwarfFileReader::read_word(uint16_t* result) { + _current_pos += 2; + return read(result, 2); +} + +bool DwarfFile::MarkedDwarfFileReader::read_dword(uint32_t* result) { + _current_pos += 4; + return read(result, 4); +} + +bool DwarfFile::MarkedDwarfFileReader::read_qword(uint64_t* result) { + _current_pos += 8; + return read(result, 8); +} + +bool DwarfFile::MarkedDwarfFileReader::read_address_sized(uintptr_t* result) { + _current_pos += DwarfFile::ADDRESS_SIZE; + return read(result, DwarfFile::ADDRESS_SIZE); +} + +// See Figure 46/47 in Appendix C of the DWARF 4 spec. +bool DwarfFile::MarkedDwarfFileReader::read_leb128(uint64_t* result, const int8_t check_size, bool is_signed) { + *result = 0; // Ensure a proper result by zeroing it first. + uint8_t buf; + uint8_t shift = 0; + uint8_t bytes_read = 0; + // leb128 is not larger than 8 bytes. + while (bytes_read < 8) { + if (!read_byte(&buf)) { + return false; + } + bytes_read++; + *result |= (buf & 0x7fu) << shift; + shift += 7; + if ((buf & 0x80u) == 0) { + break; + } + } + if (bytes_read > 8 || (check_size != -1 && bytes_read > check_size)) { + // Invalid leb128 encoding or the read leb128 was larger than expected. + return false; + } + + if (is_signed && (shift < 64) && (buf & 0x40u)) { + *result |= static_cast<uint64_t>(-1L) << shift; + } + return true; +} + +bool DwarfFile::MarkedDwarfFileReader::read_uleb128_ignore(const int8_t check_size) { + uint64_t dont_care; + return read_leb128(&dont_care, check_size, false); +} + +bool DwarfFile::MarkedDwarfFileReader::read_uleb128(uint64_t* result, const int8_t check_size) { + return read_leb128(result, check_size, false); +} + +bool DwarfFile::MarkedDwarfFileReader::read_sleb128(int64_t* result, const int8_t check_size) { + return read_leb128((uint64_t*)result, check_size, true); +} + +// If result is a nullptr, we do not care about the content of the string being read. +bool DwarfFile::MarkedDwarfFileReader::read_string(char* result, const size_t result_len) { + uint8_t next_byte; + if (!read_byte(&next_byte)) { + return false; + } + + if (next_byte == 0) { + // Strings must contain at least one non-null byte. + return false; + } + + if (result != NULL) { + if (result_len < 2) { + // Strings must contain at least one non-null byte and a null byte terminator. + return false; + } + result[0] = (char)next_byte; + } + + size_t char_index = 1; + bool exceeded_buffer = false; + while (has_bytes_left()) { + // Read until we find a null byte which terminates the string. + if (!read_byte(&next_byte)) { + return false; + } + + if (result != NULL) { + if (char_index >= result_len) { + // Exceeded buffer size of 'result'. + exceeded_buffer = true; + } else { + result[char_index] = (char)next_byte; + } + char_index++; + } + if (next_byte == 0) { + if (exceeded_buffer) { + result[result_len - 1] = '\0'; // Mark end of string. + DWARF_LOG_ERROR("Tried to read " SIZE_FORMAT " bytes but exceeded buffer size of " SIZE_FORMAT ". Truncating string.", + char_index, result_len); + } + return true; + } + } + return false; +} + #endif // !_WINDOWS && !__APPLE__ diff --git a/hotspot/src/share/vm/utilities/elfFile.hpp b/hotspot/src/share/vm/utilities/elfFile.hpp index 3ce8e92..3277a40 100644 --- a/hotspot/src/share/vm/utilities/elfFile.hpp +++ b/hotspot/src/share/vm/utilities/elfFile.hpp @@ -36,6 +36,27 @@ #ifdef _LP64 +#ifdef ASSERT +// Helper macros to print different log levels during DWARF parsing +#define DWARF_LOG_SUMMARY(format, ...) DWARF_LOG_WITH_LEVEL(1, format, ##__VA_ARGS__) // Same level as error logging +#define DWARF_LOG_ERROR(format, ...) DWARF_LOG_WITH_LEVEL(1, format, ##__VA_ARGS__) +#define DWARF_LOG_INFO(format, ...) DWARF_LOG_WITH_LEVEL(2, format, ##__VA_ARGS__) +#define DWARF_LOG_DEBUG(format, ...) DWARF_LOG_WITH_LEVEL(3, format, ##__VA_ARGS__) +#define DWARF_LOG_TRACE(format, ...) DWARF_LOG_WITH_LEVEL(4, format, ##__VA_ARGS__) + +#define DWARF_LOG_WITH_LEVEL(level, format, ...) \ + if (TraceDwarfLevel >= level) { \ + tty->print("[dwarf] "); \ + tty->print_cr(format, ##__VA_ARGS__); \ + } +#else +#define DWARF_LOG_SUMMARY(format, ...) +#define DWARF_LOG_ERROR(format, ...) +#define DWARF_LOG_INFO(format, ...) +#define DWARF_LOG_DEBUG(format, ...) +#define DWARF_LOG_TRACE(format, ...) +#endif + typedef Elf64_Half Elf_Half; typedef Elf64_Word Elf_Word; typedef Elf64_Off Elf_Off; @@ -76,6 +97,29 @@ typedef Elf32_Sym Elf_Sym; class ElfStringTable; class ElfSymbolTable; class ElfFuncDescTable; +class DwarfFile; + +class FileReader : public StackObj { + protected: + FILE* const _fd; + public: + FileReader(FILE* const fd) : _fd(fd) {}; + bool read(void* buf, size_t size); + size_t read_buffer(void* buf, size_t size); + virtual bool set_position(long offset); +}; + +// Mark current position, so we can get back to it after +// reads. +class MarkedFileReader : public FileReader { + protected: + long _marked_pos; + public: + MarkedFileReader(FILE* const fd); + ~MarkedFileReader(); + + bool has_mark() const { return _marked_pos >= 0; } +}; // On Solaris/Linux platforms, libjvm.so does contain all private symbols. @@ -87,6 +131,34 @@ class ElfFuncDescTable; class ElfFile: public CHeapObj<mtInternal> { friend class ElfDecoder; + + protected: + ElfFile* m_next; + + private: + // file + const char* m_filepath; + FILE* m_file; + DwarfFile* m_dwarf_file; + + static const char* USR_LIB_DEBUG_DIRECTORY; + + // Elf header + Elf_Ehdr m_elfHdr; + + // symbol tables + ElfSymbolTable* m_symbol_tables; + + // string tables + ElfStringTable* m_string_tables; + + // function descriptors table + ElfFuncDescTable* m_funcDesc_table; + + NullDecoder::decoder_status m_status; + + ElfStringTable* m_shdr_string_table; + public: ElfFile(const char* filepath); ~ElfFile(); @@ -122,6 +194,9 @@ class ElfFile: public CHeapObj<mtInternal> { // return a string table at specified section index ElfStringTable* get_string_table(int index); + // Get filename and line number information + bool get_source_info(uint32_t offset_in_library, char* filename, size_t filename_len, int* line, bool is_pc_after_call); + protected: ElfFile* next() const { return m_next; } void set_next(ElfFile* file) { m_next = file; } @@ -134,27 +209,654 @@ protected: // On systems other than linux it always returns false. bool specifies_noexecstack() NOT_LINUX({ return false; }); - protected: - ElfFile* m_next; - private: - // file - const char* m_filepath; - FILE* m_file; + bool create_new_dwarf_file(const char* filepath); - // Elf header - Elf_Ehdr m_elfHdr; + // Struct to store the debug info read from the .gnu_debuglink section. + struct DebugInfo { + static const uint8_t CRC_LEN = 4; - // symbol tables - ElfSymbolTable* m_symbol_tables; + char _dwarf_filename[JVM_MAXPATHLEN]; + uint32_t _crc; + }; - // string tables - ElfStringTable* m_string_tables; + // Helper class to create DWARF paths when loading a DWARF file. + class DwarfFilePath { + private: + static const uint16_t MAX_DWARF_PATH_LENGTH = JVM_MAXPATHLEN; + const char* _filename; + char _path[MAX_DWARF_PATH_LENGTH]; + const uint32_t _crc; + uint16_t _null_terminator_index; // Index for the current null terminator of the string stored in _path - // function descriptors table - ElfFuncDescTable* m_funcDesc_table; + bool check_valid_path() const { + return _path[MAX_DWARF_PATH_LENGTH - 1] == '\0'; + } - NullDecoder::decoder_status m_status; + void update_null_terminator_index() { + _null_terminator_index = strlen(_path); + } + + bool copy_to_path_index(uint16_t index_in_path, const char* src); + + public: + DwarfFilePath(DebugInfo& debug_info) + : _filename(debug_info._dwarf_filename), _crc(debug_info._crc), _null_terminator_index(0) { + _path[MAX_DWARF_PATH_LENGTH - 1] = '\0'; // Ensures to have a null terminated string and not read beyond the buffer limit. + } + + const char* path() const { + return _path; + } + + const char* filename() const { + return _filename; + } + + uint32_t crc() const { + return _crc; + } + + bool set(const char* src); + + bool set_filename_after_last_slash() { + return set_after_last_slash(_filename); + } + + bool set_after_last_slash(const char* src); + bool append(const char* src); + }; + + // Load the DWARF file (.debuginfo) that belongs to this file either from (checked in listed order): + // - Same directory as the library file. + // - User defined path in environmental variable _JVM_DWARF_PATH. + // - Subdirectory .debug in same directory as the library file. + // - /usr/lib/debug directory + bool load_dwarf_file(); + + + bool read_debug_info(DebugInfo* debug_info) const; + + bool load_dwarf_file_from_same_directory(DwarfFilePath& dwarf_file_path); + bool load_dwarf_file_from_env_var_path(DwarfFilePath& dwarf_file_path); + bool load_dwarf_file_from_env_path_folder(DwarfFilePath& dwarf_file_path, const char* dwarf_path_from_env, const char* folder); + bool load_dwarf_file_from_debug_sub_directory(DwarfFilePath& dwarf_file_path); + bool load_dwarf_file_from_usr_lib_debug(DwarfFilePath& dwarf_file_path); + bool open_valid_debuginfo_file(const DwarfFilePath& dwarf_file_path); + static uint32_t get_file_crc(FILE* const file); + static uint gnu_debuglink_crc32(uint32_t crc, uint8_t* buf, size_t len); + + protected: + FILE* const fd() const { return m_file; } + + // Read the section header of section 'name'. + bool read_section_header(const char* name, Elf_Shdr& hdr) const; + bool is_valid_dwarf_file() const; +}; + +/* + * This class parses and reads filename and line number information from an associated .debuginfo file that belongs to + * this ELF file or directly from this ELF file if there is no separate .debuginfo file. The debug info is written by GCC + * in DWARF - a standardized debugging data format. There are special sections where the DWARF info is written to. These + * sections can either be put into the same ELF file or a separate .debuginfo file. For simplicity, when referring to the + * "DWARF file" or the ".debuginfo file" we just mean the file that contains the required DWARF sections. The current version + * of GCC uses DWARF version 4 as default which is defined in the official standard: http://www.dwarfstd.org/doc/DWARF4.pdf. + * This class is able to parse 32-bit DWARF version 4 for 32 and 64-bit Linux builds. GCC does not emit 64-bit DWARF and + * therefore is not supported by this parser. For some reason, GCC emits DWARF version 3 for the .debug_line section as a + * default. This parser was therefore adapted to support DWARF version 3 and 4 for the .debug_line section. Apart from that, + * other DWARF versions, especially the newest version 5, are not (yet) supported. + * + * Description of used DWARF file sections: + * - .debug_aranges: A table that consists of sets of variable length entries, each set describing the portion of the + * program's address space that is covered by a single compilation unit. In other words, the entries + * describe a mapping between addresses and compilation units. + * - .debug_info: The core DWARF data containing DWARF Information Entries (DIEs). Each DIE consists of a tag and a + * series of attributes. Each (normal) compilation unit is represented by a DIE with the tag + * DW_TAG_compile_unit and contains children. For our purposes, we are only interested in this DIE to + * get to the .debug_line section. We do not care about the children. This parser currently only + * supports normal compilation units and no partial compilation or type units. + * - .debug_abbrev: Represents abbreviation tables for all compilation units. A table for a specific compilation unit + * consists of a series of abbreviation declarations. Each declaration specifies a tag and attributes + * for a DIE. The DIEs from the compilation units in the .debug_info section need the abbreviation table + * to decode their attributes (their meaning and size). + * - .debug_line: Contains filename and line number information for each compilation unit. To get the information, a + * state machine needs to be executed which generates a matrix. Each row of this matrix describes the + * filename and line number (among other information) for a specific offset in the associated ELF library + * file. The state machine is executed until the row for the requested offset is found. The filename and + * line number information can then be fetched with the current register values of the state machine. + * + * Algorithm + * --------- + * Given: Offset into the ELF file library. + * Return: Filename and line number for this offset. + * (1) First, the path to the .debuginfo DWARF file is found by inspecting the .gnu_debuglink section of the library file. + * The DWARF file is then opened by calling the constructor of this class. Once this is done, the processing of the + * DWARF file is initiated by calling find_filename_and_line_number(). + * (2) Find the compilation unit offset by reading entries from the section .debug_aranges, which contain address range + * descriptors, until we find the correct descriptor that includes the library offset. + * (3) Find the .debug_line offset for the line number information program from the .debug_info section: + * (a) Parse the compilation unit header from the .debug_info section at the offset obtained by (2). + * (b) Read the debug_abbrev_offset into the .debug_abbrev section that belongs to this compilation unit from the + * header obtained in (3a). + * (c) Read the abbreviation code that immediately follows the compilation unit header from (3a) which is needed to + * find the correct entry in the .debug_abbrev section. + * (d) Find the correct entry in the abbreviation table in the .debug_abbrev section by starting to parse entries at + * the debug_abbrev_offset from (3b) until we find the correct one matching the abbreviation code from (3c). + * (e) Read the specified attributes of the abbreviation entry from (3d) from the compilation unit (in the .debug_info + * section) until we find the attribute DW_AT_stmt_list. This attributes represents an offset into the .debug_line + * section which contains the line number program information to get the filename and the line number. + * (4) Find the filename and line number belonging to the given library offset by running the line number program state + * machine with its registers. This creates a matrix where each row stores information for specific addresses (library + * offsets). The state machine executes different opcodes which modify the state machine registers. Certain opcodes + * will add a new row to the matrix by taking the current values of state machine registers. As soon as the correct + * matrix row matching the library offset is found, we can read the line number from the line register of the state + * machine and parse the filename from the line number program header with the given file index from the file register + * of the state machine. + * + * More details about the different phases can be found at the associated classes and methods. A visualization of the + * algorithm inside the different sections can be found in the class comments for DebugAranges, DebugAbbrev and + * LineNumberProgram further down in this file. + * + * Available (develop) log levels (-XX:TraceDwarfLevel=[1,4]) which are only present in debug builds. Each level prints + * all the logs of the previous levels and adds some more fine-grained logging: + * - Level 1 (summary + errors): + * - Prints the path of parsed DWARF file together with the resulting source information. + * - Prints all errors. + * - Level 2 (info): + * - Prints the found offsets of all DWARF sections + * - Level 3 (debug): + * - Prints the results of the steps (1) - (4) together with the generated line information matrix. + * - Level 4 (trace): + * - Complete information about intermediate states/results when parsing the DWARF file. + */ +class DwarfFile : public ElfFile { + + static const uint8_t ADDRESS_SIZE = NOT_LP64(4) LP64_ONLY(8); + // We only support 32-bit DWARF (emitted by GCC) which uses 32-bit values for DWARF section lengths and offsets + // relative to the beginning of a section. + static const uint8_t DWARF_SECTION_OFFSET_SIZE = 4; + + class MarkedDwarfFileReader : public MarkedFileReader { + private: + long _current_pos; + long _max_pos; // Used to guarantee that we stop reading in case we reached the end of a section. + + bool read_leb128(uint64_t* result, int8_t check_size, bool is_signed); + public: + MarkedDwarfFileReader(FILE* const fd) : MarkedFileReader(fd), _current_pos(-1), _max_pos(-1) {} + + virtual bool set_position(long new_pos); + long get_position() const { return _current_pos; } + void set_max_pos(long max_pos) { _max_pos = max_pos; } + // Have we reached the limit of maximally allowable bytes to read? Used to ensure to stop reading when a section ends. + bool has_bytes_left() const; + // Call this if another file reader has changed the position of the same file handle. + bool update_to_stored_position(); + // Must be called to restore the old position before this file reader changed it with update_to_stored_position(). + bool reset_to_previous_position(); + bool move_position(long offset); + bool read_sbyte(int8_t* result); + bool read_byte(uint8_t* result); + bool read_word(uint16_t* result); + bool read_dword(uint32_t* result); + bool read_qword(uint64_t* result); + bool read_uleb128_ignore(int8_t check_size = -1); + bool read_uleb128(uint64_t* result, int8_t check_size = -1); + bool read_sleb128(int64_t* result, int8_t check_size = -1); + // Reads 4 bytes for 32-bit and 8 bytes for 64-bit builds. + bool read_address_sized(uintptr_t* result); + bool read_string(char* result = NULL, size_t result_len = 0); + }; + + // (2) Processing the .debug_aranges section to find the compilation unit which covers offset_in_library. + // This is specified in section 6.1.2 of the DWARF 4 spec. + // + // Structure of .debug_aranges: + // Section Header + // % Table of variable length sets describing the address space covered by a compilation unit + // % Set 1 + // ... + // % Set i: + // % Set header + // ... + // debug_info_offset -> offset to compilation unit + // % Series of address range descriptors [beginning_address, range_length]: + // % Descriptor 1 + // ... + // % Descriptor j: + // beginning_address <= offset_in_library < beginning_address + range_length? + // => Found the correct set covering offset_in_library. Take debug_info_offset from the set header to get + // to the correct compilation unit in .debug_info. + class DebugAranges { + + // The header is defined in section 6.1.2 of the DWARF 4 spec. + struct DebugArangesSetHeader { + // The total length of all of the entries for that set, not including the length field itself. + uint32_t _unit_length; + + // This number is specific to the address lookup table and is independent of the DWARF version number. + uint16_t _version; + + // The offset from the beginning of the .debug_info or .debug_types section of the compilation unit header referenced + // by the set. In this parser we only use it as offset into .debug_info. This must be 4 bytes for 32-bit DWARF. + uint32_t _debug_info_offset; + + // The size of an address in bytes on the target architecture, 4 bytes for 32-bit and 8 bytes for 64-bit Linux builds. + uint8_t _address_size; + + // The size of a segment selector in bytes on the target architecture. This should be 0. + uint8_t _segment_size; + }; + + // Address descriptor defining a range that is covered by a compilation unit. It is defined in section 6.1.2 after + // the set header in the DWARF 4 spec. + struct AddressDescriptor { + uintptr_t beginning_address; + uintptr_t range_length; + }; + + DwarfFile* _dwarf_file; + MarkedDwarfFileReader _reader; + uint32_t _section_start_address; + + bool read_section_header(); + bool read_set_header(DebugArangesSetHeader& header); + bool read_address_descriptors(uint32_t offset_in_library, bool& found_matching_set); + bool read_address_descriptor(AddressDescriptor& descriptor); + static bool does_match_offset(uint32_t offset_in_library, const AddressDescriptor& descriptor) ; + static bool is_terminating_entry(const AddressDescriptor& descriptor); + public: + DebugAranges(DwarfFile* dwarf_file) : _dwarf_file(dwarf_file), _reader(dwarf_file->fd()), _section_start_address(0) {} + bool find_compilation_unit_offset(uint32_t offset_in_library, uint32_t* compilation_unit_offset); + + }; + + // (3a-c,e) The compilation unit is read from the .debug_info section. The structure of .debug_info is shown in the + // comments of class DebugAbbrev. + class CompilationUnit { + + // Attribute form encodings from Figure 21 in section 7.5 of the DWARF 4 spec. + static const uint8_t DW_FORM_addr = 0x01; // address + static const uint8_t DW_FORM_block2 = 0x03; // block + static const uint8_t DW_FORM_block4 = 0x04; // block + static const uint8_t DW_FORM_data2 = 0x05; // constant + static const uint8_t DW_FORM_data4 = 0x06; // constant + static const uint8_t DW_FORM_data8 = 0x07; // constant + static const uint8_t DW_FORM_string = 0x08; // string + static const uint8_t DW_FORM_block = 0x09; // block + static const uint8_t DW_FORM_block1 = 0x0a; // block + static const uint8_t DW_FORM_data1 = 0x0b; // constant + static const uint8_t DW_FORM_flag = 0x0c; // flag + static const uint8_t DW_FORM_sdata = 0x0d; // constant + static const uint8_t DW_FORM_strp = 0x0e; // string + static const uint8_t DW_FORM_udata = 0x0f; // constant + static const uint8_t DW_FORM_ref_addr = 0x10; // reference0; + static const uint8_t DW_FORM_ref1 = 0x11; // reference + static const uint8_t DW_FORM_ref2 = 0x12; // reference + static const uint8_t DW_FORM_ref4 = 0x13; // reference + static const uint8_t DW_FORM_ref8 = 0x14; // reference + static const uint8_t DW_FORM_ref_udata = 0x15; // reference + static const uint8_t DW_FORM_indirect = 0x16; // see Section 7.5.3 + static const uint8_t DW_FORM_sec_offset = 0x17; // lineptr, loclistptr, macptr, rangelistptr + static const uint8_t DW_FORM_exprloc = 0x18; // exprloc + static const uint8_t DW_FORM_flag_present = 0x19; // flag + static const uint8_t DW_FORM_ref_sig8 = 0x20; // reference + + // The header is defined in section 7.5.1.1 of the DWARF 4 spec. + struct CompilationUnitHeader { + // The length of the .debug_info contribution for that compilation unit, not including the length field itself. + uint32_t _unit_length; + + // The version of the DWARF information for the compilation unit. The value in this field is 4 for DWARF 4. + uint16_t _version; + + // The offset into the .debug_abbrev section. This offset associates the compilation unit with a particular set of + // debugging information entry abbreviations. + uint32_t _debug_abbrev_offset; + + // The size in bytes of an address on the target architecture, 4 bytes for 32-bit and 8 bytes for 64-bit Linux builds. + uint8_t _address_size; + }; + + DwarfFile* _dwarf_file; + MarkedDwarfFileReader _reader; + CompilationUnitHeader _header; + const uint32_t _compilation_unit_offset; + + // Result of a request initiated by find_debug_line_offset(). + uint32_t _debug_line_offset; + + bool read_header(); + public: + CompilationUnit(DwarfFile* dwarf_file, uint32_t compilation_unit_offset) + : _dwarf_file(dwarf_file), _reader(dwarf_file->fd()), _compilation_unit_offset(compilation_unit_offset), _debug_line_offset(0) {} + + bool find_debug_line_offset(uint32_t* debug_line_offset); + bool read_attribute_value(uint64_t attribute_form, bool is_DW_AT_stmt_list_attribute); + }; + + // (3d) Read from the .debug_abbrev section at the debug_abbrev_offset specified by the compilation unit header. + // + // The interplay between the .debug_info and .debug_abbrev sections is more complex. The following visualization of the structure + // of both sections support the comments found in the parsing steps of the CompilationUnit and DebugAbbrev class. + // + // Structure of .debug_abbrev: + // Section Header + // % Series of abbreviation tables + // % Abbreviation table 1 + // ... + // % Abbreviation table for compilation unit at debug_abbrev_offset: + // % Series of declarations: + // % Declaration 1: + // abbreviation code + // tag + // DW_CHILDREN_yes/no + // % Series of attribute specifications + // % Attribute specification 1: + // attribute name + // attribute form + // ... + // % Last attribute specification: + // 0 + // 0 + // ... + // % Declaration i: + // Abbrev code read from compilation unit [AC] + // DW_TAG_compile_unit + // DW_CHILDREN_yes + // % Series of attribute specifications + // % Attribute specification 1 [AS1] + // ... + // % Attribute specification j [ASj]: + // DW_AT_stmt_list + // DW_FORM_sec_offset + // + // + // Structure of .debug_info: + // Section Header + // % Series of compilation units + // % Compilation unit 1 + // ... + // % Compilation unit i for library offset fetched from .debug_aranges: + // % Compilation unit header: + // ... + // debug_abbrev_offset -> offset for abbreviation table in .debug_abbrev for this compilation unit + // ... + // Abbrev code -> used in .debug_abbrev to find the correct declaration [AC] + // % Series of attribute values + // Attribute value 1 (in the format defined by attribute specification 1 [AS1]) + // ... + // Attribute value j (in the format defined by attribute specification j [ASj]): + // => Specifies Offset to line number program for this compilation unit in .debug_line + class DebugAbbrev { + + struct AbbreviationDeclaration { + uint64_t _abbrev_code; + uint64_t _tag; + uint8_t _has_children; + }; + + struct AttributeSpecification { + uint64_t _name; + uint64_t _form; + }; + + // Tag encoding from Figure 18 in section 7.5 of the DWARF 4 spec. + static const uint8_t DW_TAG_compile_unit = 0x11; + + // Child determination encoding from Figure 19 in section 7.5 of the DWARF 4 spec. + static const uint8_t DW_CHILDREN_yes = 0x01; + + // Attribute encoding from Figure 20 in section 7.5 of the DWARF 4 spec. + static const uint8_t DW_AT_stmt_list = 0x10; + + /* There is no specific header for this section */ + + DwarfFile* _dwarf_file; + MarkedDwarfFileReader _reader; + CompilationUnit* _compilation_unit; // Need to read from compilation unit while parsing the entries in .debug_abbrev. + + // Result field of a request + uint32_t* _debug_line_offset; + + bool read_declaration(AbbreviationDeclaration& declaration); + static bool is_wrong_or_unsupported_format(const AbbreviationDeclaration& declaration); + bool read_attribute_specifications(bool is_DW_TAG_compile_unit); + bool read_attribute_specification(AttributeSpecification& specification); + static bool is_terminating_specification(const AttributeSpecification& attribute_specification) ; + + public: + DebugAbbrev(DwarfFile* dwarf_file, CompilationUnit* compilation_unit) : + _dwarf_file(dwarf_file), _reader(_dwarf_file->fd()), _compilation_unit(compilation_unit), + _debug_line_offset(NULL) {} + + bool read_section_header(uint32_t debug_abbrev_offset); + bool find_debug_line_offset(uint64_t abbrev_code); + }; + + // (4) The line number program for the compilation unit at the offset of the .debug_line obtained by (3). + // For some reason, earlier GCC versions emit the line number program in DWARF 2 or 3 format even though the + // default is DWARF 4. It also mixes the standards (see comments in the parsing code). + // + // Therefore, this class supports DWARF 2, 3 and 4 parsing as specified in section 6.2 of the DWARF specs. + // The parsing of DWARF 2 is already covered by the parsing of DWARF 3 as they use the shared opcodes in the same way. + // The parsing of DWARF 4, however, needs some adaptation as it consumes more data for some shared opcodes. + // + // DWARF 2 standard: https://dwarfstd.org/doc/dwarf-2.0.0.pdf + // DWARF 3 standard: https://dwarfstd.org/doc/Dwarf3.pdf + // + // + // Structure of .debug_ling: + // Section Header + // % Series of line number program entries for each compilation unit + // % Line number program 1 + // ... + // % Line number program i for our compilation unit: + // % Line program header unit header: + // ... + // version -> currently emits version 3 by default + // ... + // file_name -> sequence of file names + // % Sequence of opcodes as part of the line number program to build the line number information matrix: + // % Format of matrix: [offset, line, directory_index, file_index] + // % Line 1 + // ... + // % Line j: + // [offset matching offset_in_library, line, directory_index, file_index] + // => Get line number + look up file_index in file_name list (pick file_index'th string) + class LineNumberProgram { + + // Standard opcodes for the line number program defined in section 6.2.5.2 of the DWARF 4 spec. + static const uint8_t DW_LNS_copy = 1; + static const uint8_t DW_LNS_advance_pc = 2; + static const uint8_t DW_LNS_advance_line = 3; + static const uint8_t DW_LNS_set_file = 4; + static const uint8_t DW_LNS_set_column = 5; + static const uint8_t DW_LNS_negate_stmt = 6; + static const uint8_t DW_LNS_set_basic_block = 7; + static const uint8_t DW_LNS_const_add_pc = 8; + static const uint8_t DW_LNS_fixed_advance_pc = 9; + static const uint8_t DW_LNS_set_prologue_end = 10; // Introduced with DWARF 3 + static const uint8_t DW_LNS_set_epilogue_begin = 11; // Introduced with DWARF 3 + static const uint8_t DW_LNS_set_isa = 12; // Introduced with DWARF 3 + + // Extended opcodes for the line number program defined in section 6.2.5.2 of the DWARF 4 spec. + static const uint8_t DW_LNE_end_sequence = 1; + static const uint8_t DW_LNE_set_address = 2; + static const uint8_t DW_LNE_define_file = 3; + static const uint8_t DW_LNE_set_discriminator = 4; // Introduced with DWARF 4 + + // The header is defined in section 6.2.4 of the DWARF 4 spec. + struct LineNumberProgramHeader { + // The size in bytes of the line number information for this compilation unit, not including the unit_length + // field itself. 32-bit DWARF uses 4 bytes. + uint32_t _unit_length; + + // The version of the DWARF information for the line number program unit. The value in this field should be 4 for + // DWARF 4 and version 3 as used for DWARF 3. + uint16_t _version; + + // The number of bytes following the header_length field to the beginning of the first byte of the line number + // program itself. 32-bit DWARF uses 4 bytes. + uint32_t _header_length; + + // The size in bytes of the smallest target machine instruction. Line number program opcodes that alter the address + // and op_index registers use this and maximum_operations_per_instruction in their calculations. + uint8_t _minimum_instruction_length; + + // The maximum number of individual operations that may be encoded in an instruction. Line number program opcodes + // that alter the address and op_index registers use this and minimum_instruction_length in their calculations. + // For non-VLIW architectures, this field is 1, the op_index register is always 0, and the operation pointer is + // simply the address register. This is only used with DWARF 4. + uint8_t _maximum_operations_per_instruction; + + // The initial value of the is_stmt register. + uint8_t _default_is_stmt; + + // This parameter affects the meaning of the special opcodes. + int8_t _line_base; + + // This parameter affects the meaning of the special opcodes. + uint8_t _line_range; + + // The number assigned to the first special opcode. + uint8_t _opcode_base; + + // This array specifies the number of LEB128 operands for each of the standard opcodes. The first element of the + // array corresponds to the opcode whose value is 1, and the last element corresponds to the opcode whose value is + // opcode_base-1. DWARF 2 uses 9 standard opcodes while DWARF 3 and 4 use 12. + uint8_t _standard_opcode_lengths[12]; + + /* + * The following fields are not part of the real header and are only used for the implementation. + */ + // Offset where the filename strings are starting in header. + long _file_names_offset; + + // _header_length only specifies the number of bytes following the _header_length field. It does not include + // the size of _unit_length, _version and _header_length itself. This constant represents the number of missing + // bytes to get the real size of the header: + // sizeof(_unit_length) + sizeof(_version) + sizeof(_header_length) = 4 + 2 + 4 = 10 + static const uint8_t HEADER_DESCRIPTION_BYTES = 10; + }; + + // The line number program state consists of several registers that hold the current state of the line number program + // state machine. The state/different state registers are defined in section 6.2.2 of the DWARF 4 spec. Most of these + // fields (state registers) are not used to get the filename and the line number information. + struct LineNumberProgramState : public CHeapObj<mtInternal> { + // The program-counter value corresponding to a machine instruction generated by the compiler. + // 4 bytes on 32-bit and 8 bytes on 64-bit. + uintptr_t _address; + + // The index of an operation within a VLIW instruction. The index of the first operation is 0. For non-VLIW + // architectures, this register will always be 0. + // The address and op_index registers, taken together, form an operation pointer that can reference any + // individual operation with the instruction stream. This field was introduced with DWARF 4. + uint32_t _op_index; + + // The identity of the source file corresponding to a machine instruction. + uint32_t _file; + + // A source line number. Lines are numbered beginning at 1. The compiler may emit the value 0 in cases where an + // instruction cannot be attributed to any source line. + uint32_t _line; + + // A column number within a source line. Columns are numbered beginning at 1. The value 0 is reserved to indicate + // that a statement begins at the “left edge” of the line. + uint32_t _column; + + // Indicates that the current instruction is a recommended breakpoint location. + bool _is_stmt; + + // Indicates that the current instruction is the beginning of a basic block. + bool _basic_block; + + // Indicates that the current address is that of the first byte after the end of a sequence of target machine + // instructions. end_sequence terminates a sequence of lines. + bool _end_sequence; + + // Indicates that the current address is one (of possibly many) where execution should be suspended for an entry + // breakpoint of a function. This field was introduced with DWARF 3. + bool _prologue_end; + + // Indicates that the current address is one (of possibly many) where execution should be suspended for an exit + // breakpoint of a function. This field was introduced with DWARF 3. + bool _epilogue_begin; + + // Encodes the applicable instruction set architecture for the current instruction. + // This field was introduced with DWARF 3. + uint32_t _isa; + + // Identifies the block to which the current instruction belongs. This field was introduced with DWARF 4. + uint32_t _discriminator; + + /* + * Additional fields which are not part of the actual state as described in DWARF spec. + */ + // Header fields + // Specifies which DWARF version is used in the .debug_line section. Supported version: DWARF 2, 3, and 4. + const uint16_t _dwarf_version; + const bool _initial_is_stmt; + + // Implementation specific fields + bool _append_row; + bool _do_reset; + bool _first_entry_in_sequence; + bool _can_sequence_match_offset; + bool _found_match; + + LineNumberProgramState(const LineNumberProgramHeader& header) + : _is_stmt(header._default_is_stmt != 0), _dwarf_version(header._version), + _initial_is_stmt(header._default_is_stmt != 0), _found_match(false) { + reset_fields(); + } + + void reset_fields(); + // Defined in section 6.2.5.1 of the DWARF spec 4. add_to_address_register() must always be executed before set_index_register. + void add_to_address_register(uint32_t operation_advance, const LineNumberProgramHeader& header); + void set_index_register(uint32_t operation_advance, const LineNumberProgramHeader& header); + }; + + DwarfFile* _dwarf_file; + MarkedDwarfFileReader _reader; + LineNumberProgramHeader _header; + LineNumberProgramState* _state; + const uint32_t _offset_in_library; + const uint64_t _debug_line_offset; + bool _is_pc_after_call; + + bool read_header(); + bool run_line_number_program(char* filename, size_t filename_len, int* line); + bool apply_opcode(); + bool apply_extended_opcode(); + bool apply_standard_opcode(uint8_t opcode); + void apply_special_opcode(const uint8_t opcode); + bool does_offset_match_entry(uintptr_t previous_address, uint32_t previous_file, uint32_t previous_line); + void print_and_store_prev_entry(uint32_t previous_file, uint32_t previous_line); + bool get_filename_from_header(uint32_t file_index, char* filename, size_t filename_len); + + public: + LineNumberProgram(DwarfFile* dwarf_file, uint32_t offset_in_library, uint64_t debug_line_offset, bool is_pc_after_call) + : _dwarf_file(dwarf_file), _reader(dwarf_file->fd()), _offset_in_library(offset_in_library), + _debug_line_offset(debug_line_offset), _is_pc_after_call(is_pc_after_call) {} + + bool find_filename_and_line_number(char* filename, size_t filename_len, int* line); + }; + + public: + DwarfFile(const char* filepath) : ElfFile(filepath) {} + + /* + * Starting point of reading line number and filename information from the DWARF file. + * + * Given: Offset into the ELF library file, a filename buffer of size filename_size, a line number pointer. + * Return: True: The filename is set in the 'filename' buffer and the line number at the address pointed to by 'line'. + * False: Something went wrong either while reading from the file or during parsing due to an unexpected format. + * This could happen if the DWARF file is in an unsupported or wrong format. + * + * More details about the different phases can be found at the associated methods. + */ + bool get_filename_and_line_number(uint32_t offset_in_library, char* filename, size_t filename_len, int* line, bool is_pc_after_call); }; #endif // !_WINDOWS && !__APPLE__ diff --git a/hotspot/src/share/vm/utilities/nativeCallStack.cpp b/hotspot/src/share/vm/utilities/nativeCallStack.cpp index ee6eb31..6b18b49 100644 --- a/hotspot/src/share/vm/utilities/nativeCallStack.cpp +++ b/hotspot/src/share/vm/utilities/nativeCallStack.cpp @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "runtime/os.hpp" +#include "utilities/decoder.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/nativeCallStack.hpp" @@ -93,6 +94,8 @@ void NativeCallStack::print_on(outputStream* out, int indent) const { address pc; char buf[1024]; int offset; + int line_no; + if (is_empty()) { for (int index = 0; index < indent; index ++) out->print(" "); #if PLATFORM_NATIVE_STACK_WALKING_SUPPORTED @@ -107,10 +110,14 @@ void NativeCallStack::print_on(outputStream* out, int indent) const { // Print indent for (int index = 0; index < indent; index ++) out->print(" "); if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) { - out->print_cr("[" PTR_FORMAT "] %s+0x%x", p2i(pc), buf, offset); + out->print("[" PTR_FORMAT "] %s+0x%x", p2i(pc), buf, offset); } else { - out->print_cr("[" PTR_FORMAT "]", p2i(pc)); + out->print("[" PTR_FORMAT "]", p2i(pc)); + } + if (Decoder::get_source_info(pc, buf, sizeof(buf), &line_no, frame != 0)) { + out->print(" (%s:%d)", buf, line_no); } + out->cr(); } } } diff --git a/hotspot/src/share/vm/utilities/vmError.cpp b/hotspot/src/share/vm/utilities/vmError.cpp index 261591d..26408fa 100644 --- a/hotspot/src/share/vm/utilities/vmError.cpp +++ b/hotspot/src/share/vm/utilities/vmError.cpp @@ -1209,3 +1209,9 @@ void VMError::report_java_out_of_memory() { VMThread::execute(&op); } } + +// Returns true if the current thread reported a fatal error. +bool VMError::is_error_reported_in_current_thread() { + return first_error_tid == os::current_thread_id(); +} + diff --git a/hotspot/src/share/vm/utilities/vmError.hpp b/hotspot/src/share/vm/utilities/vmError.hpp index 299cfaa..21db84d 100644 --- a/hotspot/src/share/vm/utilities/vmError.hpp +++ b/hotspot/src/share/vm/utilities/vmError.hpp @@ -140,6 +140,9 @@ public: static jlong get_first_error_tid() { return first_error_tid; } + + // Returns true if the current thread reported a fatal error. + static bool is_error_reported_in_current_thread(); }; #endif // SHARE_VM_UTILITIES_VMERROR_HPP diff --git a/jdk/test/jdk/java/dwarf/TestDwarf.java b/jdk/test/jdk/java/dwarf/TestDwarf.java new file mode 100644 index 0000000..8e41a28 --- /dev/null +++ b/jdk/test/jdk/java/dwarf/TestDwarf.java @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8242181 + * @library ../../../lib/ ../../../lib/testlibrary + * @summary Test DWARF parser with various crashes if debug symbols are available. If the libjvm debug symbols are not + * in the same directory as the libjvm.so file, in a subdirectory called .debug, or in the path specified + * by the environment variable _JVM_DWARF_PATH, then no verification of the hs_err_file is done for libjvm.so. + * @requires vm.compMode != "Xint" & os.family == "linux" + * @run main/othervm TestDwarf + */ + +import jdk.test.lib.Asserts; +import jdk.test.lib.Platform; +import jdk.testlibrary.OutputAnalyzer; +import jdk.testlibrary.ProcessTools; + +import sun.misc.Unsafe; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class TestDwarf { + public static void main(String[] args) throws Throwable { + if (args.length != 0) { + switch (args[0]) { + case "outOfMemory" : + crashOutOfMemory(); + Asserts.fail("Should crash in crashOutOfMemory()"); + case "abortVMOnException" : + crashAbortVmOnException(); + Asserts.fail("Should crash in crashAbortVmOnException()"); + } + } else { + try { + test(); + } catch (UnsupportedDwarfVersionException e) { + System.out.println("Skip test due to a DWARF section that is in an unsupported version by the parser."); + } + } + } + + // Crash the VM in different ways in order to verify that DWARF parsing is able to print the source information + // in the hs_err_files for each VM and C stack frame. + private static void test() throws Throwable { + runAndCheck(new Flags("-Xcomp", "-XX:CICrashAt=1", "-version")); + runAndCheck(new Flags("-Xmx100M", "-XX:ErrorHandlerTest=15", "-version")); + runAndCheck(new Flags("-XX:+CrashGCForDumpingJavaThread", "-version")); + runAndCheck(new Flags("-Xmx10m", "-XX:+CrashOnOutOfMemoryError", TestDwarf.class.getCanonicalName(), "outOfMemory")); + // Use -XX:-TieredCompilation as C1 is currently not aborting the VM (JDK-8264899). + runAndCheck(new Flags("-XX:-TieredCompilation", "-XX:+UnlockDiagnosticVMOptions", "-XX:AbortVMOnException=MyException", + TestDwarf.class.getCanonicalName(), "abortVMOnException")); + if (Platform.isX64() || Platform.isX86()) { + // Not all platforms raise SIGFPE but x86_32 and x86_64 do. + } + } + + private static void runAndCheck(Flags flags, DwarfConstraint... constraints) throws Throwable { + OutputAnalyzer crashOut; + ProcessBuilder pb; + int flag_size = flags.getFlags().size(); + pb = ProcessTools.createJavaProcessBuilder(flags.getFlags().toArray(new String[flag_size])); + crashOut = ProcessTools.executeProcess(pb); + String crashOutputString = crashOut.getOutput(); + Asserts.assertNotEquals(crashOut.getExitValue(), 0, "Crash JVM should not exit gracefully"); + Pattern pattern = Pattern.compile("hs_err_pid[0-9]*.log"); + Matcher matcher = pattern.matcher(crashOutputString); + System.out.println(crashOutputString); + if (matcher.find()) { + String hsErrFileName = matcher.group(); + System.out.println("hs_err_file: " + hsErrFileName); + File hs_err_file = new File(hsErrFileName); + BufferedReader reader = new BufferedReader(new FileReader(hs_err_file)); + String line; + boolean foundNativeFrames = false; + int matches = 0; + int frameIdx = 0; + // Check all stack entries after the line starting with "Native frames" in the hs_err_file until an empty line + // is found which denotes the end of the stack frames. + while ((line = reader.readLine()) != null) { + if (foundNativeFrames) { + if (line.isEmpty()) { + // Done with the entire stack. + break; + } else if ((line.startsWith("C") || line.startsWith("V"))) { + // Could be VM or native C frame. There are usually no symbols available for libpthread.so. + matches++; + // File and library names are non-empty and may contain English letters, underscores, dots or numbers ([a-zA-Z0-9_.]+). + // Line numbers have at least one digit and start with non-zero ([1-9][0-9]*). + pattern = Pattern.compile("[CV][\\s\\t]+\\[([a-zA-Z0-9_.]+)\\+0x.+][\\s\\t]+.*\\+0x.+[\\s\\t]+\\([a-zA-Z0-9_.]+\\.[a-z]+:[1-9][0-9]*\\)"); + matcher = pattern.matcher(line); + if (!matcher.find()) { + checkNoSourceLine(crashOutputString, line); + } + + // Check additional DWARF constraints + if (constraints != null) { + int finalFrameIdx = frameIdx; + String finalLine = line; + Arrays.stream(constraints).forEach(c -> c.checkConstraint(finalFrameIdx, finalLine)); + } + } + frameIdx++; + } else if (line.startsWith("Native frames")) { + // Stack starts after this line. + foundNativeFrames = true; + } + } + Asserts.assertGreaterThan(matches, 0, "Could not find any stack frames"); + } else { + throw new RuntimeException("Could not find an hs_err_file"); + } + } + + /** + * There are some valid cases where we cannot find source information. Check these. + */ + private static void checkNoSourceLine(String crashOutputString, String line) { + Pattern pattern = Pattern.compile("[CV][\\s\\t]+\\[([a-zA-Z0-9_.]+)\\+0x.+][\\s\\t]+.*\\+0x"); + Matcher matcher = pattern.matcher(line); + Asserts.assertTrue(matcher.find(), "Must find library in \"" + line + "\""); + // Check if there are symbols available for library. If not, then we cannot find any source information for this library. + // This can happen if this test is run without any JDK debug symbols at all but also for some libraries like libpthread.so + // which usually has no symbols available. + String library = matcher.group(1); + pattern = Pattern.compile("Failed to load DWARF file for library.*" + library + ".*or find DWARF sections directly inside it"); + matcher = pattern.matcher(crashOutputString); + if (!matcher.find()) { + bailoutIfUnsupportedDwarfVersion(crashOutputString); + throw new RuntimeException("Could not find filename or line number in \"" + line + "\""); + } + // We should always find symbols for libTestDwarf.so. + Asserts.assertFalse(library.equals("libTestDwarf.so"), "Could not find filename or line number in \"" + line + "\" for libTestDwarf.so"); + System.out.println("Did not find symbols for " + library + ". If they are not in the same directory as " + library + " consider setting " + + "the environmental variable _JVM_DWARF_PATH to point to the debug symbols directory."); + } + + /** + * Some older GCC versions might emit DWARF sections in an old format that is not supported by the DWARF parser. + * If this is the case, skip this entire test by throwing UnsupportedDwarfVersionException. + */ + private static void bailoutIfUnsupportedDwarfVersion(String crashOutputString) { + Pattern pattern = Pattern.compile(".debug_\\S+ in unsupported DWARF version \\d+"); + Matcher matcher = pattern.matcher(crashOutputString); + if (matcher.find()) { + throw new UnsupportedDwarfVersionException(); + } + } + + // Crash with SIGSEGV. + private static void crashUnsafeAccess() throws Exception { + Field f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + Unsafe unsafe = (Unsafe)f.get(null); + unsafe.putAddress(0, 0); // Crash + } + + // Crash with Internal Error: Java heap space. + private static void crashOutOfMemory() { + Object[] o = null; + + // Loop endlessly and consume memory until we run out. Will crash due to -XX:+CrashOnOutOfMemoryError. + while (true) { + o = new Object[] {o}; + } + } + + // Crash with Internal Error: Saw java.lang.RuntimeException, aborting. + // Crash happens due to an exception raised in combination with -XX:AbortVMOnException. + private static void crashAbortVmOnException() { + throw new MyException(); + } +} + +class UnsupportedDwarfVersionException extends RuntimeException { } + +class MyException extends RuntimeException { } + +class Flags { + private final List<String> listOfOptions = new ArrayList<>(); + + Flags(String... flags) { + listOfOptions.add("-XX:TraceDwarfLevel=2"); // Always add debug flag + listOfOptions.addAll(Arrays.asList(flags)); + } + + public List<String> getFlags() { + return listOfOptions; + } + +} +class DwarfConstraint { + private final int frameIdx; + private final String methodName; + private final String dwarfInfo; + + DwarfConstraint(int frameIdx, String methodName, String fileName, int lineNo) { + this.frameIdx = frameIdx; + this.methodName = methodName; + this.dwarfInfo = "(" + fileName + ":" + lineNo + ")"; + } + + public void checkConstraint(int currentFrameIdx, String line) { + if (frameIdx == currentFrameIdx) { + Asserts.assertTrue(line.contains(methodName), "Could not find method name " + methodName + " in \"" + line + "\""); + Asserts.assertTrue(line.contains(dwarfInfo) , "Could not find DWARF info " + dwarfInfo + " in \"" + line + "\""); + } + } +} + -- 1.8.3.1
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2