Skip to content

Commit 1424fb8

Browse files
neboattarunprabhu
authored andcommitted
[CodeGenPGO] Ensure that PGO correctly instruments cilk_for loops with atomic instrumentation. Fixes llvm#30
1 parent 7d0926e commit 1424fb8

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

clang/lib/CodeGen/CodeGenPGO.cpp

+53
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ class PGOHash {
126126
BinaryOperatorNE,
127127
// The preceding values are available since PGO_HASH_V2.
128128

129+
// Cilk statements. These values are also available with PGO_HASH_V1.
130+
CilkForStmt,
131+
129132
// Keep this last. It's for the static assert that follows.
130133
LastHashType
131134
};
@@ -267,6 +270,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
267270
DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
268271
DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
269272
DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
273+
DEFINE_NESTABLE_TRAVERSAL(CilkForStmt)
270274

271275
/// Get version \p HashVersion of the PGO hash for \p S.
272276
PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
@@ -327,6 +331,8 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
327331
}
328332
break;
329333
}
334+
case Stmt::CilkForStmtClass:
335+
return PGOHash::CilkForStmt;
330336
}
331337

332338
if (HashVersion >= PGO_HASH_V2) {
@@ -744,6 +750,53 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
744750
setCount(ParentCount + RHSCount - CurrentCount);
745751
RecordNextStmtCount = true;
746752
}
753+
754+
void VisitCilkForStmt(const CilkForStmt *S) {
755+
RecordStmtCount(S);
756+
if (S->getInit())
757+
Visit(S->getInit());
758+
if (S->getLimitStmt())
759+
Visit(S->getLimitStmt());
760+
if (S->getBeginStmt())
761+
Visit(S->getBeginStmt());
762+
if (S->getEndStmt())
763+
Visit(S->getEndStmt());
764+
if (S->getLoopVarDecl())
765+
Visit(S->getLoopVarDecl());
766+
767+
uint64_t ParentCount = CurrentCount;
768+
769+
BreakContinueStack.push_back(BreakContinue());
770+
// Visit the body region first. (This is basically the same as a while
771+
// loop; see further comments in VisitWhileStmt.)
772+
uint64_t BodyCount = setCount(PGO.getRegionCount(S));
773+
CountMap[S->getBody()] = BodyCount;
774+
Visit(S->getBody());
775+
uint64_t BackedgeCount = CurrentCount;
776+
BreakContinue BC = BreakContinueStack.pop_back_val();
777+
778+
// The increment is essentially part of the body but it needs to include
779+
// the count for all the continue statements.
780+
if (S->getInc()) {
781+
uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
782+
CountMap[S->getInc()] = IncCount;
783+
Visit(S->getInc());
784+
}
785+
786+
// ...then go back and propagate counts through the condition.
787+
uint64_t CondCount =
788+
setCount(ParentCount + BackedgeCount + BC.ContinueCount);
789+
if (S->getInitCond()) {
790+
CountMap[S->getInitCond()] = ParentCount;
791+
Visit(S->getInitCond());
792+
}
793+
if (S->getCond()) {
794+
CountMap[S->getCond()] = CondCount;
795+
Visit(S->getCond());
796+
}
797+
setCount(BC.BreakCount + CondCount - BodyCount);
798+
RecordNextStmtCount = true;
799+
}
747800
};
748801
} // end anonymous namespace
749802

clang/test/Cilk/cilkfor-pgo.cpp

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Check that -fprofile-instrument generates atomic
2+
// instrumentation instructions inside of _Cilk_for loops.
3+
//
4+
// Credit to Brian Wheatman for the original source of this test.
5+
//
6+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fprofile-instrument=clang -fprofile-update=atomic %s -S -emit-llvm -fopencilk -ftapir=none -o - 2>&1 | FileCheck %s
7+
// expected-no-diagnostics
8+
9+
int main() {
10+
int sum = 0;
11+
_Cilk_for(int i = 0; i < 1000000; i++) { sum += i; }
12+
13+
return sum;
14+
}
15+
16+
// CHECK: @__profc_main = {{.*}}global [2 x i64] zeroinitializer, section "__llvm_prf_cnts"
17+
18+
// CHECK-LABEL: define {{.*}}i32 @main()
19+
20+
// CHECK: detach within %{{.+}}, label %[[PFOR_BODY:.+]], label %[[PFOR_INC:.+]]
21+
22+
// CHECK: [[PFOR_BODY]]:
23+
// CHECK: atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__profc_main, i64 0, i64 1), i64 1 monotonic
24+
// CHECK: reattach within %{{.+}}, label %[[PFOR_INC]]

0 commit comments

Comments
 (0)