Skip to content

Commit 124a627

Browse files
committed
Pharos batch commit July 5, 2022
This version of Pharos requires a more recent version of SWI Prolog: v8.5.12. * Adjustments for new versions of ROSE * Fix problem with definers in LEA instructions * Introduce a new fact, thisPtrDefinition, that will assist with future facts describing how to access virtual bases. * New reasonObjectInObject_F rule * Fix VFTableBelongsToClass rule * Export information about inlined vftable writes * Overhaul of the GTE_D rule * Refine some rules to require negative offsets * Add ability to lift symbolic expressions to Prolog Fix #179 Fix #224 Fix #209 Fix #212 Fix #204
1 parent af54b6a commit 124a627

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+32300
-29099
lines changed

.dir-locals.el

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
(
2-
(nil . ((indent-tabs-mode . nil)
3-
(fill-column . 95)))
2+
(nil . ((indent-tabs-mode . nil)))
3+
(prog-mode . ((fill-column . 95)))
44
(c-mode . ((c-basic-offset . 2)
55
(c-block-comment-prefix . "")
66
(comment-start . "// ")

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,4 @@ local.cmake
5050
**/.tramp_history
5151
**/.tramp_ignore
5252
**/.gdb_history
53+
**/*/*.pdb

CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ endif()
6464

6565
# Rose
6666

67-
# Anyone modifying this version should also manually modify libpharos/version.hpp.in. They should set the PHAORS_ROSE_MINI
67+
# Anyone modifying this version should also manually modify libpharos/version.hpp.in. They
68+
# should set the PHAROS_ROSE_MINIMUM_VERSION variable to match.
6869
set(PHAROS_ROSE_MINIMUM_VERSION 0.11.39.6)
6970

7071
find_package(Rose ${PHAROS_ROSE_MINIMUM_VERSION} REQUIRED)

INSTALL.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ The latest release version of Z3 (4.8.7) has passed our testing.
196196

197197
We build Z3 using commands like these:
198198
```
199-
$ git clone -b Z3-4.8.7 https://github.com/Z3Prover/z3.git
199+
$ git clone -b z3-4.8.7 https://github.com/Z3Prover/z3.git
200200
$ cd z3
201201
$ mkdir build
202202
$ cd build
@@ -272,11 +272,11 @@ SWI Prolog version 8.3.19 or later fixes a bug in Prolog that can be
272272
triggered by ooanalyzer in some rare cases. See
273273
https://github.com/cmu-sei/pharos/issues/156 for more details.
274274

275-
These commands will build and install version 8.3.16 to
275+
These commands will build and install version 8.5.12 to
276276
$SWIPL_LOCATION:
277277

278278
```
279-
$ git clone --recursive -b V8.3.19 --depth 1 https://github.com/swi-prolog/swipl-devel
279+
$ git clone --recursive -b V8.5.12 --depth 1 https://github.com/swi-prolog/swipl-devel
280280
$ cd swipl-devel
281281
$ mkdir build
282282
$ cd build

libpharos/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ set(PHAROS_SOURCES
3939
pdg.cpp
4040
prolog.cpp
4141
prologimpl.cpp
42+
prolog_symexp.cpp
4243
revision.cpp
4344
riscops.cpp
4445
semantics.cpp
@@ -65,7 +66,7 @@ set(PHAROS_SOURCES
6566
)
6667

6768
# Packages needed by libpharos
68-
find_package(SWIPL "8.2.0" REQUIRED)
69+
find_package(SWIPL "8.3.16" REQUIRED)
6970
find_package(SQLite REQUIRED)
7071
set(CURSES_NEED_NCURSES true)
7172
find_package(Curses REQUIRED)

libpharos/defuse.cpp

+40-24
Original file line numberDiff line numberDiff line change
@@ -806,35 +806,13 @@ DUAnalysis::fake_read(SgAsmX86Instruction* insn, const AbstractAccess& aa) const
806806
// This approach is a little lazy but I didn't want to have to write my own visitor class to
807807
// look for a specific subexpression.
808808

809-
// JSG updated this with Robb M's guideance on searching for a subexpression.
810-
using VisitAction = Rose::BinaryAnalysis::SymbolicExpr::VisitAction;
811-
using Visitor = Rose::BinaryAnalysis::SymbolicExpr::Visitor;
812-
813809
TreeNodePtr read_tn = aa.value->get_expression();
814810

815-
// The visitor needed to search for the fake read (subexpression)
816-
struct SubxVisitor : Visitor {
817-
TreeNodePtr needle_;
818-
819-
SubxVisitor(const TreeNodePtr &needle) : needle_(needle) { }
820-
821-
VisitAction preVisit(const TreeNodePtr &expr) {
822-
if (expr->isEquivalentTo(needle_)) {
823-
return Rose::BinaryAnalysis::SymbolicExpr::TERMINATE;
824-
}
825-
return Rose::BinaryAnalysis::SymbolicExpr::CONTINUE;
826-
}
827-
VisitAction postVisit(const TreeNodePtr&) {
828-
return Rose::BinaryAnalysis::SymbolicExpr::CONTINUE;
829-
}
830-
};
831-
832811
for (const AbstractAccess& waa : get_writes(insn->get_address())) {
833812
TreeNodePtr write_tn = waa.value->get_expression();
834-
SubxVisitor read_subx(read_tn);
835813

836814
// search if this write includes the read. If it does, then it is not fake
837-
if (write_tn->depthFirstTraversal(read_subx) == Rose::BinaryAnalysis::SymbolicExpr::TERMINATE) {
815+
if (has_subexp (write_tn, read_tn)) {
838816
// The read was somewhere in the write, so the value was _truly_ read, and we've already
839817
// established that it was initialized by someone else, so it's not a truly uninitialized
840818
// read.
@@ -1291,7 +1269,7 @@ DUAnalysis::make_call_dependencies(SgAsmX86Instruction* insn, SymbolicStatePtr&
12911269
// the parameter onto the stack before the call. This code is a little bit incorrect in
12921270
// that we're assuming that all four bytes have still been modified by the same instruction
12931271
// as the first byte was.
1294-
SymbolicValuePtr mca = SymbolicValue::promote(memcell->get_address());
1272+
SymbolicValuePtr mca = SymbolicValue::promote(memcell->address());
12951273

12961274
// Use rops->readMemory to read memory to read either 32 or 64 bits depending on the
12971275
// architecture. In truth, we have no idea how large the object being pointed to is and
@@ -1380,6 +1358,39 @@ void update_jump_targets(const DescriptorSet& ds, SgAsmInstruction* insn) {
13801358
}
13811359
}
13821360

1361+
void DUAnalysis::guess_function_delta() {
1362+
// Because it's shorter than current_function...
1363+
FunctionDescriptor* fd = current_function;
1364+
1365+
1366+
for (CFGVertex vertex : fd->get_return_vertices()) {
1367+
const SgAsmBlock *block = convert_vertex_to_bblock(cfg, vertex);
1368+
// Find the return instruction.
1369+
const SgAsmStatementPtrList &insns = block->get_statementList();
1370+
assert(insns.size() > 0);
1371+
SgAsmX86Instruction *last = isSgAsmX86Instruction(insns[insns.size() - 1]);
1372+
if (last != NULL and last->get_kind() == x86_ret) {
1373+
// Get the number of bytes popped off the stack by the return instruction (not counting the
1374+
// return address).
1375+
SgAsmExpressionPtrList &ops = last->get_operandList()->get_operands();
1376+
if (ops.size() > 0) {
1377+
SgAsmIntegerValueExpression *val = isSgAsmIntegerValueExpression(ops[0]);
1378+
if (val != NULL) {
1379+
// This is the number on the RETN instruction and is also our expected stack delta.
1380+
int64_t retn_size = val->get_absoluteValue();
1381+
SDEBUG << "Guessing that function " << fd->address_string() << " pops "
1382+
<< retn_size << " additional bytes off the stack." << LEND;
1383+
// Mak a StackDelta() obect and record it as _our_ stack delta. This value will get
1384+
// used if the analyzed function is recursive and calls itself.
1385+
StackDelta guessed = StackDelta(retn_size, ConfidenceGuess);
1386+
fd->update_stack_delta(guessed);
1387+
return;
1388+
}
1389+
}
1390+
}
1391+
}
1392+
}
1393+
13831394
void DUAnalysis::update_function_delta() {
13841395
// Because it's shorter than current_function...
13851396
FunctionDescriptor* fd = current_function;
@@ -2196,6 +2207,11 @@ DUAnalysis::solve_flow_equation_iteratively()
21962207
sp_tracker.update_delta(current_function->get_address(), StackDelta(0, ConfidenceCertain),
21972208
sd_failures);
21982209

2210+
// If this function is recursive, we need a better guess for our own stack delta than just
2211+
// zero. Make that guess and store it in the stack delta tracker cache before starting
2212+
// anlaysis.
2213+
guess_function_delta();
2214+
21992215
// Do most of the real work. Visit each block in the control flow graph, possibly multiple
22002216
// times, merging predecessor states, emulating each block, and updating state histories.
22012217
rstatus = loop_over_cfg();

libpharos/defuse.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ class DUAnalysis {
259259
LimitCode evaluate_bblock(SgAsmBlock* bblock);
260260
void update_call_targets(SgAsmX86Instruction* insn, SymbolicRiscOperatorsPtr& ops);
261261
void update_function_delta();
262+
void guess_function_delta();
262263
void make_call_dependencies(SgAsmX86Instruction* insn, SymbolicStatePtr& cstate);
263264

264265
// Report debugging messages. No analysis.

libpharos/descriptors.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2015-2021 Carnegie Mellon University. See LICENSE file for terms.
1+
// Copyright 2015-2022 Carnegie Mellon University. See LICENSE file for terms.
22

33
#include <boost/graph/graph_traits.hpp>
44
#include <boost/graph/adjacency_list.hpp>
@@ -270,7 +270,7 @@ void partition(const ProgOptVarMap & vm)
270270
auto file = pfile.native();
271271
if (!vm.count("serialize")) {
272272
auto filename = pfile.filename().native();
273-
auto sername = filename + ".serialized";
273+
auto sername = bf::path{filename + ".serialized"};
274274
auto vmcopy = vm;
275275
vmcopy.emplace("serialize"s,
276276
boost::program_options::variable_value{boost::any{sername}, false});

libpharos/ir.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2015-2019, 2021 Carnegie Mellon University. See LICENSE file for terms.
1+
// Copyright 2015-2022 Carnegie Mellon University. See LICENSE file for terms.
22

33
#include "ir.hpp"
44
#include "util.hpp"
@@ -24,7 +24,7 @@ using namespace pharos;
2424
using namespace pharos::ir;
2525

2626
namespace {
27-
namespace IRSemantics2 = Rose::BinaryAnalysis::InstructionSemantics2;
27+
namespace IRSemantics2 = Rose::BinaryAnalysis::InstructionSemantics;
2828

2929
using SymValue = IRSemantics2::SymbolicSemantics::SValue;
3030
using SymValuePtr = IRSemantics2::SymbolicSemantics::SValuePtr;
@@ -54,13 +54,13 @@ using SymState = IRSemantics2::SymbolicSemantics::State;
5454
using SymStatePtr = IRSemantics2::SymbolicSemantics::StatePtr;
5555
using SymRiscOperators = IRSemantics2::SymbolicSemantics::RiscOperators;
5656
using SymRiscOperatorsPtr = IRSemantics2::SymbolicSemantics::RiscOperatorsPtr;
57-
using BaseDispatcher = Rose::BinaryAnalysis::InstructionSemantics2::BaseSemantics::Dispatcher;
58-
using BaseDispatcherPtr = Rose::BinaryAnalysis::InstructionSemantics2::BaseSemantics::DispatcherPtr;
57+
using BaseDispatcher = Rose::BinaryAnalysis::InstructionSemantics::BaseSemantics::Dispatcher;
58+
using BaseDispatcherPtr = Rose::BinaryAnalysis::InstructionSemantics::BaseSemantics::DispatcherPtr;
5959

6060
using IRRegisterStatePtr = boost::shared_ptr<class IRRegisterState>;
6161
using IRRiscOperatorsPtr = boost::shared_ptr<class IRRiscOperators>;
6262

63-
using SemanticsException = Rose::BinaryAnalysis::InstructionSemantics2::BaseSemantics::Exception;
63+
using SemanticsException = Rose::BinaryAnalysis::InstructionSemantics::BaseSemantics::Exception;
6464

6565
// Stub implementation
6666
bool is_register(Register r) {
@@ -1169,7 +1169,7 @@ Register IR::get_reg(RegisterDescriptor rd) const {
11691169
GFATAL << "Unable to find register " << rd << LEND;
11701170
exit(EXIT_FAILURE);
11711171
}
1172-
Register exp = Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics::SValue::promote(reg_it->value)->get_expression()->isLeafNode();
1172+
Register exp = Rose::BinaryAnalysis::InstructionSemantics::SymbolicSemantics::SValue::promote(reg_it->value)->get_expression()->isLeafNode();
11731173
assert (exp);
11741174

11751175
return exp;

libpharos/ir.hpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2015-2021 Carnegie Mellon University. See LICENSE file for terms.
1+
// Copyright 2015-2022 Carnegie Mellon University. See LICENSE file for terms.
22

33
#ifndef Pharos_Ir_H
44
#define Pharos_Ir_H
@@ -20,7 +20,11 @@
2020
#include <boost/graph/graphviz.hpp>
2121

2222
#include "rose.hpp"
23+
#if PHAROS_ROSE_SYMBOLIC_EXTENSION_HACK
24+
#include <Rose/BinaryAnalysis/InstructionSemantics/SymbolicSemantics.h>
25+
#else
2326
#include <Rose/BinaryAnalysis/InstructionSemantics2/SymbolicSemantics.h>
27+
#endif
2428

2529
#include "calls.hpp"
2630
#include "cdg.hpp"
@@ -33,7 +37,7 @@ using CFG = Rose::BinaryAnalysis::ControlFlow::Graph;
3337
using CFGVertex = boost::graph_traits<CFG>::vertex_descriptor;
3438

3539
namespace SymbolicExpr = Rose::BinaryAnalysis::SymbolicExpr;
36-
namespace BaseSemantics = Rose::BinaryAnalysis::InstructionSemantics2::BaseSemantics;
40+
namespace BaseSemantics = Rose::BinaryAnalysis::InstructionSemantics::BaseSemantics;
3741
using SymbolicExprPtr = SymbolicExpr::Ptr;
3842
using SymbolicLeaf = SymbolicExpr::Leaf;
3943
using SymbolicLeafPtr = SymbolicExpr::LeafPtr;

libpharos/method.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,12 @@ bool ThisCallMethod::validate_vtable(ConstVirtualTableInstallationPtr install) {
538538
// embeds another object and the constuctor for the embeded object was inlined into
539539
// this method, we get misleading results here.
540540
if (!(vp->isEquivalentTo(leaf))) {
541-
GDEBUG << "Rejected virtual function table write for wrong object " << *vp << " != "
542-
<< *leaf << " at instruction: " << debug_instruction(insn) << LEND;
543-
return false;
541+
542+
// 2/16/22: ejs disabled this to see possibleVFTableWrites that were inlined
543+
544+
// GDEBUG << "Rejected virtual function table write for wrong object " << *vp << " != "
545+
// << *leaf << " at instruction: " << debug_instruction(insn) << LEND;
546+
//return false;
544547
}
545548
}
546549

libpharos/misc.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,36 @@ template void debug_print_expression(
11001100
std::ostream & stream, SymbolicSemantics::SValuePtr const & e);
11011101
template void debug_print_expression(SymbolicSemantics::SValuePtr const & e);
11021102

1103+
bool has_subexp (const TreeNodePtr haystack, const TreeNodePtr needle) {
1104+
1105+
using Rose::BinaryAnalysis::SymbolicExpr::VisitAction;
1106+
using Rose::BinaryAnalysis::SymbolicExpr::Visitor;
1107+
using Rose::BinaryAnalysis::SymbolicExpr::CONTINUE;
1108+
using Rose::BinaryAnalysis::SymbolicExpr::TERMINATE;
1109+
1110+
struct SubxVisitor : Visitor {
1111+
1112+
TreeNodePtr needle_;
1113+
1114+
SubxVisitor(const TreeNodePtr &needle) : needle_(needle) {}
1115+
1116+
virtual VisitAction preVisit(const TreeNodePtr &expr) override {
1117+
if (expr->isEquivalentTo(needle_)) {
1118+
return TERMINATE;
1119+
}
1120+
return CONTINUE;
1121+
}
1122+
virtual VisitAction postVisit(const TreeNodePtr&) override {
1123+
return CONTINUE;
1124+
}
1125+
};
1126+
1127+
SubxVisitor read_subx(needle);
1128+
1129+
// search if this write includes the read. If it does, then it is not fake
1130+
return (haystack->depthFirstTraversal(read_subx) == Rose::BinaryAnalysis::SymbolicExpr::TERMINATE);
1131+
}
1132+
11031133
} // namespace pharos
11041134

11051135
/* Local Variables: */

libpharos/misc.hpp

+16-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2015-2021 Carnegie Mellon University. See LICENSE file for terms.
1+
// Copyright 2015-2022 Carnegie Mellon University. See LICENSE file for terms.
22

33
#ifndef Pharos_Misc_H
44
#define Pharos_Misc_H
@@ -8,19 +8,29 @@
88
#include "rose.hpp"
99
// For TreeNodePtr, LeafNodePtr, etc.
1010
#include <Rose/BinaryAnalysis/SymbolicExpr.h>
11-
// For Semantics2 namespace.
11+
// For Semantics namespace.
12+
#if PHAROS_ROSE_SYMBOLIC_EXTENSION_HACK
13+
#include <Rose/BinaryAnalysis/InstructionSemantics/SymbolicSemantics.h>
14+
#else
1215
#include <Rose/BinaryAnalysis/InstructionSemantics2/SymbolicSemantics.h>
16+
#endif
1317
// For P2 namespace.
1418
#include <Rose/BinaryAnalysis/Partitioner2/Partitioner.h>
1519
#include <Rose/BinaryAnalysis/MemoryMap.h>
1620

1721
#include <numeric>
1822

23+
#if !PHAROS_ROSE_SYMBOLIC_EXTENSION_HACK
24+
namespace Rose{ namespace BinaryAnalysis {
25+
namespace InstructionSemantics = Rose::BinaryAnalysis::InstructionSemantics2;
26+
}}
27+
#endif
28+
1929
namespace pharos {
2030

2131
using Rose::BinaryAnalysis::MemoryMap;
2232
namespace P2 = Rose::BinaryAnalysis::Partitioner2;
23-
namespace Semantics2 = Rose::BinaryAnalysis::InstructionSemantics2;
33+
namespace Semantics2 = Rose::BinaryAnalysis::InstructionSemantics;
2434
namespace SymbolicSemantics = Semantics2::SymbolicSemantics;
2535

2636
// Make sure overload resolution for operator<< can see into the global namespace
@@ -108,7 +118,7 @@ std::string addr_str(rose_addr_t addr);
108118
rose_addr_t address_from_node(LeafNodePtr tnp);
109119

110120
// Make the ROSE Semantics2 namespace a little shorter to type...
111-
namespace Semantics2 = Rose::BinaryAnalysis::InstructionSemantics2;
121+
namespace Semantics2 = Rose::BinaryAnalysis::InstructionSemantics;
112122

113123
// Expression printers (very useful from a debugger)
114124
void print_expression(std::ostream & stream, TreeNode & e);
@@ -178,6 +188,8 @@ using Rose::BinaryAnalysis::RegisterDictionary;
178188

179189
void set_glog_name(std::string const & name);
180190

191+
bool has_subexp (const TreeNodePtr haystack, const TreeNodePtr needle);
192+
181193
} // namespace pharos
182194

183195
// The main program need to provide a global logging facility.

0 commit comments

Comments
 (0)