您的位置:首页 > 编程语言

看开源代码如何解析ELF文件

2016-06-19 00:54 671 查看
工具ROPgadget

在ROPgadget中有识别并分析多种文件结构,这次主要用这个功能来分析ELF文件格式。

分析的文件为libc.so

上代码

class Binary:
def __init__(self, options):
self.__fileName  = options.binary
self.__rawBinary = None
self.__binary    = None

try:
fd = open(self.__fileName, "rb")
self.__rawBinary = fd.read()
fd.close()
except:
print("[Error] Can't open the binary or binary not found")
return None

if   options.rawArch and options.rawMode:
self.__binary = Raw(self.__rawBinary, options.rawArch, options.rawMode)
elif self.__rawBinary[:4] == unhexlify(b"7f454c46"):
self.__binary = ELF(self.__rawBinary)
elif self.__rawBinary[:2] == unhexlify(b"4d5a"):
self.__binary = PE(self.__rawBinary)
elif self.__rawBinary[:4] == unhexlify(b"cafebabe"):
self.__binary = UNIVERSAL(self.__rawBinary)
elif self.__rawBinary[:4] == unhexlify(b"cefaedfe") or self.__rawBinary[:4] == unhexlify(b"cffaedfe"):
self.__binary = MACHO(self.__rawBinary)
else:
print("[Error] Binary format not supported")
return None
在binary.py中进行文件类型判定,ELF文件最开始四个字节‘7f454c46’

class ELFFlags:
ELFCLASS32  = 0x01
ELFCLASS64  = 0x02
EI_CLASS    = 0x04
EI_DATA     = 0x05
ELFDATA2LSB = 0x01
ELFDATA2MSB = 0x02
EM_386      = 0x03
EM_X86_64   = 0x3e
EM_ARM      = 0x28
EM_MIPS     = 0x08
EM_SPARCv8p = 0x12
EM_PowerPC  = 0x14
EM_ARM64    = 0xb7


class ELF:
def __init__(self, binary):
self.__binary    = bytearray(binary)
self.__ElfHeader = None
self.__shdr_l    = []
self.__phdr_l    = []

self.__setHeaderElf()
self.__setShdr()
self.__setPhdr()
ELF文件类初始化

def __setHeaderElf(self):
e_ident = self.__binary[:15]#ELF文件魔数

ei_class = e_ident[ELFFlags.EI_CLASS]
ei_data  = e_ident[ELFFlags.EI_DATA]

if ei_class != ELFFlags.ELFCLASS32 and ei_class != ELFFlags.ELFCLASS64:
print("[Error] ELF.__setHeaderElf() - Bad Arch size")
return None

if ei_data != ELFFlags.ELFDATA2LSB and ei_data != ELFFlags.ELFDATA2MSB:
print("[Error] ELF.__setHeaderElf() - Bad architecture endian")
return None

if ei_class == ELFFlags.ELFCLASS32:
if ei_data == ELFFlags.ELFDATA2LSB:
self.__ElfHeader = Elf32_Ehdr_LSB.from_buffer_copy(self.__binary)
elif ei_data == ELFFlags.ELFDATA2MSB:
self.__ElfHeader = Elf32_Ehdr_MSB.from_buffer_copy(self.__binary)
elif ei_class == ELFFlags.ELFCLASS64:
if ei_data == ELFFlags.ELFDATA2LSB:
self.__ElfHeader = Elf64_Ehdr_LSB.from_buffer_copy(self.__binary)
elif ei_data == ELFFlags.ELFDATA2MSB:
self.__ElfHeader = Elf64_Ehdr_MSB.from_buffer_copy(self.__binary)

self.getArch() # Check if architecture is supported
设置ELF文件头

ELF前16个字节称为魔数

其中前四字节之前已经说过了,第一个字符是ACSII字符中DEL控制符,后三个是ELF的ACSII码

第5个字节为Class位,0为无效文件,1为32位文件,2为64位文件

第6个字节指定字节序(Data)有以下取值

0 无效格式

1 小端格式

2 大端格式

根据5和6字节信息选择相应的拷贝方式(如32位小端等)

def getArch(self):
if self.__ElfHeader.e_machine == ELFFlags.EM_386 or self.__ElfHeader.e_machine == ELFFlags.EM_X86_64:
return CS_ARCH_X86
elif self.__ElfHeader.e_machine == ELFFlags.EM_ARM:
return CS_ARCH_ARM
elif self.__ElfHeader.e_machine == ELFFlags.EM_ARM64:
return CS_ARCH_ARM64
elif self.__ElfHeader.e_machine == ELFFlags.EM_MIPS:
return CS_ARCH_MIPS
elif self.__ElfHeader.e_machine == ELFFlags.EM_PowerPC:
return CS_ARCH_PPC
elif self.__ElfHeader.e_machine == ELFFlags.EM_SPARCv8p:
return CS_ARCH_SPARC
else:
print("[Error] ELF.getArch() - Architecture not supported")
return None
e_machine是一个双字节(19,20字节)的表示CPU平台属性的成员

之后执行函数

def __setShdr(self):<span style="white-space:pre">	</span>#设置段头部
shdr_num = self.__ElfHeader.e_shnum    #段数量
base = self.__binary[self.__ElfHeader.e_shoff:]#获取段表
shdr_l = []

e_ident = self.__binary[:15]
ei_data = e_ident[ELFFlags.EI_DATA]

for i in range(shdr_num):

if self.getArchMode() == CS_MODE_32:
if   ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf32_Shdr_LSB.from_buffer_copy(base)#32位小端格式拷贝
elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf32_Shdr_MSB.from_buffer_copy(base)
elif self.getArchMode() == CS_MODE_64:
if   ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf64_Shdr_LSB.from_buffer_copy(base)
elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf64_Shdr_MSB.from_buffer_copy(base)

self.__shdr_l.append(shdr)
base = base[self.__ElfHeader.e_shentsize:]

# setup name from the strings table
if self.__ElfHeader.e_shstrndx != 0:
string_table = str(self.__binary[(self.__shdr_l[self.__ElfHeader.e_shstrndx].sh_offset):])
for i in range(shdr_num):
self.__shdr_l[i].str_name = string_table[self.__shdr_l[i].sh_name:].split('\0')[0]
该函数处理段头部

第一行self.__ElfHeader.e_shnum表示段数量,e_shnum也是一个双字节成员(48,49字节),在本机的实际运行中可以看到libc.so的段数量为32(好TM多)

第二行self.__ElfHeader.e_shoff代表段表在文件中的偏移,32位版本中为4字节(33,34,35,36字节),实际运行值为0x4b88f

之后同样按照32位小端格式从段表中拷贝添加到self.__shdr_l

self.__ElfHeader.e_shentsize指段表描述符大小,双字节(47,48字节),实际运行值40

self.__ElfHeader.e_shstrndx指段表字符串表所在段在段表中的下标,双字节(51,52字节),实际运行值为31,也就是说段表中最后一个段是段表字符串表所在段(好拗口)

根据这个值找到字符串所在段,然后依次分给各段

下一个函数

def __setPhdr(self):
pdhr_num = self.__ElfHeader.e_phnum
base = self.__binary[self.__ElfHeader.e_phoff:]
phdr_l = []

e_ident = self.__binary[:15]
ei_data = e_ident[ELFFlags.EI_DATA]

for i in range(pdhr_num):
if self.getArchMode() == CS_MODE_32:
if   ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf32_Phdr_LSB.from_buffer_copy(base)
elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf32_Phdr_MSB.from_buffer_copy(base)
elif self.getArchMode() == CS_MODE_64:
if   ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf64_Phdr_LSB.from_buffer_copy(base)
elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf64_Phdr_MSB.from_buffer_copy(base)

self.__phdr_l.append(phdr)
base = base[self.__ElfHeader.e_phentsize:]
self.__ElfHeader.e_phnum是ELF执行视图中Segment的个数,双字节(45,46),实际结果9

self.__ElfHeader.e_phoff是Segment的偏移,双字节(43,44)

之后和段表一样,放入self__phdr_l
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: