20
20
//
21
21
// ===----------------------------------------------------------------------===//
22
22
23
- #include " llvm/Transforms/Scalar.h"
24
23
#include " llvm/ADT/DenseMap.h"
25
24
#include " llvm/ADT/PostOrderIterator.h"
26
25
#include " llvm/ADT/STLExtras.h"
27
26
#include " llvm/ADT/SetVector.h"
27
+ #include " llvm/ADT/SmallSet.h"
28
28
#include " llvm/ADT/Statistic.h"
29
29
#include " llvm/IR/CFG.h"
30
30
#include " llvm/IR/Constants.h"
37
37
#include " llvm/Pass.h"
38
38
#include " llvm/Support/Debug.h"
39
39
#include " llvm/Support/raw_ostream.h"
40
+ #include " llvm/Transforms/Scalar.h"
40
41
#include " llvm/Transforms/Utils/Local.h"
41
42
#include < algorithm>
42
43
using namespace llvm ;
@@ -161,6 +162,13 @@ namespace {
161
162
DenseMap<BasicBlock*, unsigned > RankMap;
162
163
DenseMap<AssertingVH<Value>, unsigned > ValueRankMap;
163
164
SetVector<AssertingVH<Instruction> > RedoInsts;
165
+
166
+ // Arbitrary, but prevents quadratic behavior.
167
+ static const unsigned GlobalReassociateLimit = 10 ;
168
+ static const unsigned NumBinaryOps =
169
+ Instruction::BinaryOpsEnd - Instruction::BinaryOpsBegin;
170
+ DenseMap<std::pair<Value *, Value *>, unsigned > PairMap[NumBinaryOps];
171
+
164
172
bool MadeChange;
165
173
public:
166
174
static char ID; // Pass identification, replacement for typeid
@@ -196,6 +204,7 @@ namespace {
196
204
void EraseInst (Instruction *I);
197
205
void OptimizeInst (Instruction *I);
198
206
Instruction *canonicalizeNegConstExpr (Instruction *I);
207
+ void BuildPairMap (ReversePostOrderTraversal<Function *> &RPOT);
199
208
};
200
209
}
201
210
@@ -2234,18 +2243,127 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
2234
2243
return ;
2235
2244
}
2236
2245
2246
+ if (Ops.size () > 2 && Ops.size () <= GlobalReassociateLimit) {
2247
+ // Find the pair with the highest count in the pairmap and move it to the
2248
+ // back of the list so that it can later be CSE'd.
2249
+ // example:
2250
+ // a*b*c*d*e
2251
+ // if c*e is the most "popular" pair, we can express this as
2252
+ // (((c*e)*d)*b)*a
2253
+ unsigned Max = 1 ;
2254
+ unsigned BestRank = 0 ;
2255
+ std::pair<unsigned , unsigned > BestPair;
2256
+ unsigned Idx = I->getOpcode () - Instruction::BinaryOpsBegin;
2257
+ for (unsigned i = 0 ; i < Ops.size () - 1 ; ++i)
2258
+ for (unsigned j = i + 1 ; j < Ops.size (); ++j) {
2259
+ unsigned Score = 0 ;
2260
+ Value *Op0 = Ops[i].Op ;
2261
+ Value *Op1 = Ops[j].Op ;
2262
+ if (std::less<Value *>()(Op1, Op0))
2263
+ std::swap (Op0, Op1);
2264
+ auto it = PairMap[Idx].find ({Op0, Op1});
2265
+ if (it != PairMap[Idx].end ())
2266
+ Score += it->second ;
2267
+
2268
+ unsigned MaxRank = std::max (Ops[i].Rank , Ops[j].Rank );
2269
+ if (Score > Max || (Score == Max && MaxRank < BestRank)) {
2270
+ BestPair = {i, j};
2271
+ Max = Score;
2272
+ BestRank = MaxRank;
2273
+ }
2274
+ }
2275
+ if (Max > 1 ) {
2276
+ auto Op0 = Ops[BestPair.first ];
2277
+ auto Op1 = Ops[BestPair.second ];
2278
+ Ops.erase (&Ops[BestPair.second ]);
2279
+ Ops.erase (&Ops[BestPair.first ]);
2280
+ Ops.push_back (Op0);
2281
+ Ops.push_back (Op1);
2282
+ }
2283
+ }
2237
2284
// Now that we ordered and optimized the expressions, splat them back into
2238
2285
// the expression tree, removing any unneeded nodes.
2239
2286
RewriteExprTree (I, Ops);
2240
2287
}
2241
2288
2289
+ void Reassociate::BuildPairMap (ReversePostOrderTraversal<Function *> &RPOT) {
2290
+ // Make a "pairmap" of how often each operand pair occurs.
2291
+ for (BasicBlock *BI : RPOT) {
2292
+ for (Instruction &I : *BI) {
2293
+ if (!I.isAssociative ())
2294
+ continue ;
2295
+
2296
+ // Ignore nodes that aren't at the root of trees.
2297
+ if (I.hasOneUse () && I.user_back ()->getOpcode () == I.getOpcode ())
2298
+ continue ;
2299
+
2300
+ // Collect all operands in a single reassociable expression.
2301
+ // Since Reassociate has already been run once, we can assume things
2302
+ // are already canonical according to Reassociation's regime.
2303
+ SmallVector<Value *, 8 > Worklist = {I.getOperand (0 ), I.getOperand (1 )};
2304
+ SmallVector<Value *, 8 > Ops;
2305
+ while (!Worklist.empty () && Ops.size () <= GlobalReassociateLimit) {
2306
+ Value *Op = Worklist.pop_back_val ();
2307
+ Instruction *OpI = dyn_cast<Instruction>(Op);
2308
+ if (!OpI || OpI->getOpcode () != I.getOpcode () || !OpI->hasOneUse ()) {
2309
+ Ops.push_back (Op);
2310
+ continue ;
2311
+ }
2312
+ // Be paranoid about self-referencing expressions in unreachable code.
2313
+ if (OpI->getOperand (0 ) != OpI)
2314
+ Worklist.push_back (OpI->getOperand (0 ));
2315
+ if (OpI->getOperand (1 ) != OpI)
2316
+ Worklist.push_back (OpI->getOperand (1 ));
2317
+ }
2318
+ // Skip extremely long expressions.
2319
+ if (Ops.size () > GlobalReassociateLimit)
2320
+ continue ;
2321
+
2322
+ // Add all pairwise combinations of operands to the pair map.
2323
+ unsigned BinaryIdx = I.getOpcode () - Instruction::BinaryOpsBegin;
2324
+ SmallSet<std::pair<Value *, Value *>, 32 > Visited;
2325
+ for (unsigned i = 0 ; i < Ops.size () - 1 ; ++i) {
2326
+ for (unsigned j = i + 1 ; j < Ops.size (); ++j) {
2327
+ // Canonicalize operand orderings.
2328
+ Value *Op0 = Ops[i];
2329
+ Value *Op1 = Ops[j];
2330
+ if (std::less<Value *>()(Op1, Op0))
2331
+ std::swap (Op0, Op1);
2332
+ if (!Visited.insert ({Op0, Op1}).second )
2333
+ continue ;
2334
+ auto res = PairMap[BinaryIdx].insert ({{Op0, Op1}, 1 });
2335
+ if (!res.second )
2336
+ ++res.first ->second ;
2337
+ }
2338
+ }
2339
+ }
2340
+ }
2341
+ }
2342
+
2242
2343
bool Reassociate::runOnFunction (Function &F) {
2243
2344
if (skipOptnoneFunction (F))
2244
2345
return false ;
2245
2346
2246
2347
// Calculate the rank map for F
2247
2348
BuildRankMap (F);
2248
2349
2350
+ // Build the pair map before running reassociate.
2351
+ // Technically this would be more accurate if we did it after one round
2352
+ // of reassociation, but in practice it doesn't seem to help much on
2353
+ // real-world code, so don't waste the compile time running reassociate
2354
+ // twice.
2355
+ // If a user wants, they could expicitly run reassociate twice in their
2356
+ // pass pipeline for further potential gains.
2357
+ // It might also be possible to update the pair map during runtime, but the
2358
+ // overhead of that may be large if there's many reassociable chains.
2359
+ // TODO: RPOT
2360
+ // Get the functions basic blocks in Reverse Post Order. This order is used by
2361
+ // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2362
+ // blocks (it has been seen that the analysis in this pass could hang when
2363
+ // analysing dead basic blocks).
2364
+ ReversePostOrderTraversal<Function *> RPOT (&F);
2365
+ BuildPairMap (RPOT);
2366
+
2249
2367
MadeChange = false ;
2250
2368
for (Function::iterator BI = F.begin (), BE = F.end (); BI != BE; ++BI) {
2251
2369
// Optimize every instruction in the basic block.
@@ -2268,9 +2386,11 @@ bool Reassociate::runOnFunction(Function &F) {
2268
2386
}
2269
2387
}
2270
2388
2271
- // We are done with the rank map.
2389
+ // We are done with the rank map and pair map .
2272
2390
RankMap.clear ();
2273
2391
ValueRankMap.clear ();
2392
+ for (auto &Entry : PairMap)
2393
+ Entry.clear ();
2274
2394
2275
2395
return MadeChange;
2276
2396
}
0 commit comments