170 lines
5.8 KiB
Python
170 lines
5.8 KiB
Python
#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
|
|
#
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
#
|
|
#===------------------------------------------------------------------------===#
|
|
|
|
from ctypes import CFUNCTYPE
|
|
from ctypes import POINTER
|
|
from ctypes import addressof
|
|
from ctypes import c_byte
|
|
from ctypes import c_char_p
|
|
from ctypes import c_int
|
|
from ctypes import c_size_t
|
|
from ctypes import c_ubyte
|
|
from ctypes import c_uint64
|
|
from ctypes import c_void_p
|
|
from ctypes import cast
|
|
|
|
from .common import LLVMObject
|
|
from .common import c_object_p
|
|
from .common import get_library
|
|
|
|
__all__ = [
|
|
'Disassembler',
|
|
]
|
|
|
|
lib = get_library()
|
|
callbacks = {}
|
|
|
|
# Constants for set_options
|
|
Option_UseMarkup = 1
|
|
|
|
|
|
|
|
_initialized = False
|
|
_targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore']
|
|
def _ensure_initialized():
|
|
global _initialized
|
|
if not _initialized:
|
|
# Here one would want to call the functions
|
|
# LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but
|
|
# unfortunately they are only defined as static inline
|
|
# functions in the header files of llvm-c, so they don't exist
|
|
# as symbols in the shared library.
|
|
# So until that is fixed use this hack to initialize them all
|
|
for tgt in _targets:
|
|
for initializer in ("TargetInfo", "TargetMC", "Disassembler"):
|
|
try:
|
|
f = getattr(lib, "LLVMInitialize" + tgt + initializer)
|
|
except AttributeError:
|
|
continue
|
|
f()
|
|
_initialized = True
|
|
|
|
|
|
class Disassembler(LLVMObject):
|
|
"""Represents a disassembler instance.
|
|
|
|
Disassembler instances are tied to specific "triple," which must be defined
|
|
at creation time.
|
|
|
|
Disassembler instances can disassemble instructions from multiple sources.
|
|
"""
|
|
def __init__(self, triple):
|
|
"""Create a new disassembler instance.
|
|
|
|
The triple argument is the triple to create the disassembler for. This
|
|
is something like 'i386-apple-darwin9'.
|
|
"""
|
|
|
|
_ensure_initialized()
|
|
|
|
ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
|
|
callbacks['op_info'](0), callbacks['symbol_lookup'](0))
|
|
if not ptr:
|
|
raise Exception('Could not obtain disassembler for triple: %s' %
|
|
triple)
|
|
|
|
LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
|
|
|
|
def get_instruction(self, source, pc=0):
|
|
"""Obtain the next instruction from an input source.
|
|
|
|
The input source should be a str or bytearray or something that
|
|
represents a sequence of bytes.
|
|
|
|
This function will start reading bytes from the beginning of the
|
|
source.
|
|
|
|
The pc argument specifies the address that the first byte is at.
|
|
|
|
This returns a 2-tuple of:
|
|
|
|
long number of bytes read. 0 if no instruction was read.
|
|
str representation of instruction. This will be the assembly that
|
|
represents the instruction.
|
|
"""
|
|
buf = cast(c_char_p(source), POINTER(c_ubyte))
|
|
out_str = cast((c_byte * 255)(), c_char_p)
|
|
|
|
result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
|
|
c_uint64(pc), out_str, 255)
|
|
|
|
return (result, out_str.value)
|
|
|
|
def get_instructions(self, source, pc=0):
|
|
"""Obtain multiple instructions from an input source.
|
|
|
|
This is like get_instruction() except it is a generator for all
|
|
instructions within the source. It starts at the beginning of the
|
|
source and reads instructions until no more can be read.
|
|
|
|
This generator returns 3-tuple of:
|
|
|
|
long address of instruction.
|
|
long size of instruction, in bytes.
|
|
str representation of instruction.
|
|
"""
|
|
source_bytes = c_char_p(source)
|
|
out_str = cast((c_byte * 255)(), c_char_p)
|
|
|
|
# This could probably be written cleaner. But, it does work.
|
|
buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
|
|
offset = 0
|
|
address = pc
|
|
end_address = pc + len(source)
|
|
while address < end_address:
|
|
b = cast(addressof(buf) + offset, POINTER(c_ubyte))
|
|
result = lib.LLVMDisasmInstruction(self, b,
|
|
c_uint64(len(source) - offset), c_uint64(address),
|
|
out_str, 255)
|
|
|
|
if result == 0:
|
|
break
|
|
|
|
yield (address, result, out_str.value)
|
|
|
|
address += result
|
|
offset += result
|
|
|
|
def set_options(self, options):
|
|
if not lib.LLVMSetDisasmOptions(self, options):
|
|
raise Exception('Unable to set all disassembler options in %i' % options)
|
|
|
|
|
|
def register_library(library):
|
|
library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
|
|
callbacks['op_info'], callbacks['symbol_lookup']]
|
|
library.LLVMCreateDisasm.restype = c_object_p
|
|
|
|
library.LLVMDisasmDispose.argtypes = [Disassembler]
|
|
|
|
library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
|
|
c_uint64, c_uint64, c_char_p, c_size_t]
|
|
library.LLVMDisasmInstruction.restype = c_size_t
|
|
|
|
library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
|
|
library.LLVMSetDisasmOptions.restype = c_int
|
|
|
|
|
|
callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
|
|
c_int, c_void_p)
|
|
callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
|
|
POINTER(c_uint64), c_uint64,
|
|
POINTER(c_char_p))
|
|
|
|
register_library(lib)
|