diff --git a/src/rvprof_symbols.c b/src/rvprof_symbols.c index 57e0dc6..d130a4d 100644 --- a/src/rvprof_symbols.c +++ b/src/rvprof_symbols.c @@ -64,5 +64,172 @@ static int add_symbol(uintptr_t addr, const char* name, size_t size){ g_rvprof.symbols.data[g_rvprof.symbols.size].size = size; g_rvprof.symbols.size++; + return 0; +} + +// ELF symbol parsing +static int parse_elf_symbols(const char* filepath){ + int fp = open(filepath, O_RDONLY); + if (fp< 0) return -1; + + struct stat st; + if (fstat(fp, &st) < 0){ + close(fp); + return -1; + } + + // sanity check: file size, max fort executables defined here: 1 GiB + if (st.st_size < (off_t)sizeof(Elf64_Ehdr) || st.st_size > 1024*1024*1024){ + close(fp); + return -1; + } + + void* mapped = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fp, 0); + close(fp); + + // check ELF magic with bounds checkuing + if (st.st_size < 16){ + munmap(mapped, st.st_size); + return -1; + } + + unsigned char* elf_data = (unsigned char*)mapped; + if (elf_data[0] != 0x7f || elf_data[1] != 'E' || elf_data[2] != 'L' || elf_data != 'F'){ + // not an ELF file + munmap(mapped, st.st_size); + return -1; + } + + // 32 or 64 bit ELF ? + int is_64bit = (elf_data[4] == 2); + + if(is_64bit){ + if(st.st_size < (off_t)sizeof(Elf64_Ehdr)){ + // mismatch + munmap(mapped, st.st_size); + return -1; + } + + Elf64_Ehdr* ehdr = (Elf64_Ehdr*)mapped; + // bounds checking: section header table + if (ehdr->e_shoff == 0 || ehdr->e_shnum == 0 || ehdr->e_shoff + (ehdr->e_shnum * sizeof(Elf64_Shdr)) > (size_t)st.st_size){ + munmap(mapped, st.st_size); + return -1; + } + + Elf64_Shdr* shdrs = (Elf64_Shdr*)((char*)mapped + ehdr->e_shoff); + // bounds checking: string table index + if (ehdr->e_shstrndx >= ehdr->e_shnum){ + munmap(mapped, st.st_size); + return -1; + } + + // parse .symtab and .dynsym sections + for (int i=0; ie_shnum; i++){ + Elf64_Shdr *shdr = &shdrs[i]; + + if (shdr->sh_type == SHT_SYMTAB || shdr->sh_type == SHT_DYNSYM){ + // bounds checking: symbol table + if (shdr->sh_offset + shdr->sh_size > (size_t)st.st_size) continue; + if (shdr->sh_size == 0 || shdr->sh_size % sizeof(Elf64_Sym) != 0) continue; + + // find associated string table + if (shdr->sh_link >= ehdr->e_shnum) continue; + + Elf64_Shdr* strtab_shdr = &shdrs[shdr->sh_link]; + // bounds checking: string table + if (strtab_shdr->sh_offset + strtab_shdr->sh_size > (size_t)st.st_size) continue; + if (strtab_shdr->sh_size == 0) continue; + + char* strtab = (char*)mapped + strtab_shdr->sh_offset; + Elf64_Sym* symbols = (Elf64_Sym*)((char*)mapped + shdr->sh_offset); + int num_syms = shdr->sh_size / sizeof(Elf64_Sym); + for (int j=0; jst_info) != STT_FUNC) continue; + if (sym->st_value == 0) continue; + if (sym->st_name == 0) continue; + if (sym->st_name >= strtab_shdr->sh_size) continue; + + char* name = &strtab[sym->st_name]; + + // verify null termination within bounds + int name_len = strnlen(name, strtab_shdr->sh_size - sym->st_name); + if (name_len == 0 || name_len >= (int)(strtab_shdr->sh_size - sym->st_name)) continue; + + add_symbol(sym->st_value, name, sym->st_size); + } + + } + } + + } else{ + // 32-bit case (similar logic) + if (st.st_size < (off_t)sizeof(Elf32_Ehdr)){ + munmap(mapped, st.st_size); + return -1; + } + + Elf32_Ehdr* ehdr = (Elf32_Ehdr*)mapped; + + if (ehdr->e_shoff == 0 || ehdr->e_shnum == 0 || + ehdr->e_shoff + (ehdr->e_shnum * sizeof(Elf32_Shdr)) > (size_t)st.st_size){ + munmap(mapped, st.st_size); + return -1; + } + + Elf32_Shdr* shdrs = (Elf32_Shdr*)((char*)mapped + ehdr->e_shoff); + + if (ehdr->e_shstrndx >= ehdr->e_shnum){ + munmap(mapped, st.st_size); + return -1; + } + + for (int i = 0; i < ehdr->e_shnum; i++){ + Elf32_Shdr* shdr = &shdrs[i]; + + if (shdr->sh_type == SHT_SYMTAB || shdr->sh_type == SHT_DYNSYM){ + if (shdr->sh_offset + shdr->sh_size > (size_t)st.st_size) continue; + if (shdr->sh_size == 0 || shdr->sh_size % sizeof(Elf32_Sym) != 0) continue; + + if (shdr->sh_link >= ehdr->e_shnum) continue; + + Elf32_Shdr* strtab_shdr = &shdrs[shdr->sh_link]; + + if (strtab_shdr->sh_offset + strtab_shdr->sh_size > (size_t)st.st_size) continue; + if (strtab_shdr->sh_size == 0) continue; + + char* strtab = (char*)mapped + strtab_shdr->sh_offset; + Elf32_Sym* symbols = (Elf32_Sym*)((char*)mapped + shdr->sh_offset); + int num_syms = shdr->sh_size / sizeof(Elf32_Sym); + + for (int j = 0; j < num_syms; j++){ + Elf32_Sym* sym = &symbols[j]; + + if (ELF32_ST_TYPE(sym->st_info) != STT_FUNC) continue; + if (sym->st_value == 0) continue; + if (sym->st_name == 0) continue; + if (sym->st_name >= strtab_shdr->sh_size) continue; + + char* name = &strtab[sym->st_name]; + + int name_len = strnlen(name, strtab_shdr->sh_size - sym->st_name); + if (name_len == 0 || name_len >= (int)(strtab_shdr->sh_size - sym->st_name)) continue; + + add_symbol(sym->st_value, name, sym->st_size); + } + } + } + } + + munmap(mapped, st.st_size); + + // sort symbols by address for quick binary search + if (g_rvprof.symbols.size > 0){ + qsort(g_rvprof.symbols.data, g_rvprof.symbols.size, sizeof(symbol_entry_t), symbol_compare); + g_rvprof.symbols_loaded = 1; + } + return 0; } \ No newline at end of file