-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathvm_disassembler.py
152 lines (124 loc) · 4.52 KB
/
vm_disassembler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/python3
import sys
from miasm.analysis.binary import Container
from miasm.analysis.machine import Machine
from miasm.core.locationdb import LocationDB
from miasm.expression.expression import *
from miasm.expression.simplifications import expr_simp
from miasm.ir.symbexec import SymbolicExecutionEngine
# hardcoded list of VM handlers taken from the binary
VM_HANDLERS = set([
0x129e,
0x1238,
0x126d,
0x11c4,
0x1262,
0x11a9,
0x1245,
0x11f1,
0x11e1,
0x1281,
0x1226,
])
def constraint_memory(address, num_of_bytes):
"""
Reads `num_of_bytes` from the binary at a given address
and builds symbolic formulas to pre-configure the symbolic
execution engine for concolinc execution.
"""
global container
# read bytes from binary
byte_stream = container.bin_stream.getbytes(address, num_of_bytes)
# build symbolic memory address
sym_address = ExprMem(ExprInt(address, 64), num_of_bytes * 8)
# build symbolic memory value
sym_value = ExprInt(int.from_bytes(
byte_stream, byteorder='little'), num_of_bytes * 8)
return sym_address, sym_value
def disassemble(sb, address):
"""
Callback to dump individual VM handler information,
execution context etc.
"""
# fetch concrete value of current virtual instruction pointer
vip = sb.symbols[ExprId("RDX", 64)]
# catch the individual handlers and print execution context
if int(address) == 0x129e:
print(f"{vip}: handler {address}")
elif int(address) == 0x1238:
print(f"{vip}: handler {address}")
elif int(address) == 0x126d:
print(f"{vip}: handler {address}")
elif int(address) == 0x11c4:
print(f"{vip}: handler {address}")
elif int(address) == 0x1262:
print(f"{vip}: handler {address}")
elif int(address) == 0x11a9:
print(f"{vip}: handler {address}")
elif int(address) == 0x1245:
print(f"{vip}: handler {address}")
elif int(address) == 0x11f1:
print(f"{vip}: handler {address}")
elif int(address) == 0x11e1:
# calculate address for bytecode
# address = expr_simp(vip + ExprInt(1, 64))
# read from bytecode
# constant = expr_simp(sb.symbols[ExprMem(address, 32)])
# print(f"{vip}: PUSH {constant}")
print(f"{vip}: handler {address}")
elif int(address) == 0x1281:
print(f"{vip}: handler {address}")
elif int(address) == 0x1226:
print(f"{vip}: handler {address}")
# check arguments
if len(sys.argv) != 2:
print(f"[*] Syntax: {sys.argv[0]} <file>")
exit()
# parse file path
file_path = sys.argv[1]
# address of vm entry
start_addr = 0x115a
# init symbol table
loc_db = LocationDB()
# read binary file
container = Container.from_stream(open(file_path, 'rb'), loc_db)
# get CPU abstraction
machine = Machine(container.arch)
# disassembly engine
mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db)
# initialize lifter to intermediate representation
lifter = machine.lifter_model_call(mdis.loc_db)
# disassemble the function at address
asm_cfg = mdis.dis_multiblock(start_addr)
# translate asm_cfg into ira_cfg
ira_cfg = lifter.new_ircfg_from_asmcfg(asm_cfg)
# init SE engine
sb = SymbolicExecutionEngine(lifter)
# constraint bytecode -- start address and size (highest address - lowest address)
sym_address, sym_value = constraint_memory(0x4060, 0x4140 - 0x4060)
sb.symbols[sym_address] = sym_value
# constraint VM input (rdi, first function argument). The value in `ExprInt` rerpesents the function's input value.
rdi = ExprId("RDI", 64)
sb.symbols[rdi] = ExprInt(2, 64)
# init worklist
basic_block_worklist = [ExprInt(start_addr, 64)]
# worklist algorithm
while basic_block_worklist:
# get current block
current_block = basic_block_worklist.pop()
# print(f"current block: {current_block}")
# if current block is a VM handler, dump handler-specific knowledge
if current_block.is_int() and int(current_block) in VM_HANDLERS:
disassemble(sb, current_block)
# symbolical execute block -> next_block: symbolic value/address to execute
next_block = sb.run_block_at(ira_cfg, current_block, step=False)
# print(f"next block: {next_block}")
# is next block is integer or label, continue execution
if next_block.is_int() or next_block.is_loc():
basic_block_worklist.append(next_block)
# dump symbolic state
# sb.dump()
# dump VMs/functions' return value -- only works if SE runs until the end
# rax = ExprId("RAX", 64)
# value = sb.symbols[rax]
# print(f"VM return value: {value}")