Skip to content

Commit 1ee70fd

Browse files
authored
Add a flag for the upstream global reassociation algorithm change (#6625)
This PR (#6598) pulls the upstream global reassociation algorithm change in DXC and can reduce redundant calculations obviously. However, from the testing result of a large offline suite of shaders, some shaders got worse compilation results and couldn't benefit from this upstream change. This PR adds a flag for the upstream global reassociation change. It would be easier to roll back if a shader get worse compilation result due to this upstream change. This is part 2 of the fix for #6593.
1 parent 6a34e29 commit 1ee70fd

File tree

9 files changed

+119
-21
lines changed

9 files changed

+119
-21
lines changed

include/dxc/Support/DxcOptToggles.h

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ enum {
3838
static const Toggle TOGGLE_GVN = {"gvn", DEFAULT_ON};
3939
static const Toggle TOGGLE_LICM = {"licm", DEFAULT_ON};
4040
static const Toggle TOGGLE_SINK = {"sink", DEFAULT_ON};
41+
static const Toggle TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION = {
42+
"aggressive-reassociation", DEFAULT_ON};
4143
static const Toggle TOGGLE_LIFETIME_MARKERS = {"lifetime-markers", DEFAULT_ON};
4244
static const Toggle TOGGLE_PARTIAL_LIFETIME_MARKERS = {
4345
"partial-lifetime-markers", DEFAULT_OFF};

include/llvm/Transforms/IPO/PassManagerBuilder.h

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ class PassManagerBuilder {
134134
unsigned ScanLimit = 0; // HLSL Change
135135
bool EnableGVN = true; // HLSL Change
136136
bool StructurizeLoopExitsForUnroll = false; // HLSL Change
137+
bool HLSLEnableAggressiveReassociation = true; // HLSL Change
137138
bool HLSLEnableLifetimeMarkers = false; // HLSL Change
138139
bool HLSLEnablePartialLifetimeMarkers = false; // HLSL Change
139140
bool HLSLEnableDebugNops = false; // HLSL Change

include/llvm/Transforms/Scalar.h

+2
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ extern char &DemoteRegisterToMemoryHlslID;
331331
// For example: 4 + (x + 5) -> x + (4 + 5)
332332
//
333333
FunctionPass *createReassociatePass();
334+
FunctionPass *
335+
createReassociatePass(bool HLSLEnableAggressiveReassociation); // HLSL Change
334336

335337
//===----------------------------------------------------------------------===//
336338
//

lib/Transforms/IPO/PassManagerBuilder.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,8 @@ void PassManagerBuilder::populateModulePassManager(
469469
//MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
470470
// HLSL Change Ends.
471471
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
472-
MPM.add(createReassociatePass()); // Reassociate expressions
472+
MPM.add(createReassociatePass(
473+
HLSLEnableAggressiveReassociation)); // Reassociate expressions
473474
// Rotate Loop - disable header duplication at -Oz
474475
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
475476
// HLSL Change - disable LICM in frontend for not consider register pressure.

lib/Transforms/Scalar/Reassociate.cpp

+47-19
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,22 @@ namespace {
176176
initializeReassociatePass(*PassRegistry::getPassRegistry());
177177
}
178178

179+
// HLSL Change - begin
180+
// Enable global reassociation when HLSLEnableAggressiveReassociation is
181+
// set
182+
bool HLSLEnableAggressiveReassociation = true;
183+
Reassociate(bool HLSLEnableAggressiveReassociation) : Reassociate() {
184+
this->HLSLEnableAggressiveReassociation =
185+
HLSLEnableAggressiveReassociation;
186+
}
187+
188+
void applyOptions(PassOptions O) override {
189+
GetPassOptionBool(O, "EnableAggressiveReassociation",
190+
&HLSLEnableAggressiveReassociation,
191+
/*defaultValue*/ true);
192+
}
193+
// HLSL Change - end
194+
179195
bool runOnFunction(Function &F) override;
180196

181197
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -242,6 +258,13 @@ INITIALIZE_PASS(Reassociate, "reassociate",
242258
// Public interface to the Reassociate pass
243259
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
244260

261+
// HLSL Change - begin
262+
FunctionPass *
263+
llvm::createReassociatePass(bool HLSLEnableAggressiveReassociation) {
264+
return new Reassociate(HLSLEnableAggressiveReassociation);
265+
}
266+
// HLSL Change - end
267+
245268
/// Return true if V is an instruction of the specified opcode and if it
246269
/// only has one use.
247270
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
@@ -2243,7 +2266,8 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
22432266
return;
22442267
}
22452268

2246-
if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2269+
if (HLSLEnableAggressiveReassociation && // HLSL Change
2270+
(Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit)) {
22472271
// Find the pair with the highest count in the pairmap and move it to the
22482272
// back of the list so that it can later be CSE'd.
22492273
// example:
@@ -2347,22 +2371,24 @@ bool Reassociate::runOnFunction(Function &F) {
23472371
// Calculate the rank map for F
23482372
BuildRankMap(F);
23492373

2350-
// Build the pair map before running reassociate.
2351-
// Technically this would be more accurate if we did it after one round
2352-
// of reassociation, but in practice it doesn't seem to help much on
2353-
// real-world code, so don't waste the compile time running reassociate
2354-
// twice.
2355-
// If a user wants, they could expicitly run reassociate twice in their
2356-
// pass pipeline for further potential gains.
2357-
// It might also be possible to update the pair map during runtime, but the
2358-
// overhead of that may be large if there's many reassociable chains.
2359-
// TODO: RPOT
2360-
// Get the functions basic blocks in Reverse Post Order. This order is used by
2361-
// BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2362-
// blocks (it has been seen that the analysis in this pass could hang when
2363-
// analysing dead basic blocks).
2364-
ReversePostOrderTraversal<Function *> RPOT(&F);
2365-
BuildPairMap(RPOT);
2374+
if (HLSLEnableAggressiveReassociation) { // HLSL Change
2375+
// Build the pair map before running reassociate.
2376+
// Technically this would be more accurate if we did it after one round
2377+
// of reassociation, but in practice it doesn't seem to help much on
2378+
// real-world code, so don't waste the compile time running reassociate
2379+
// twice.
2380+
// If a user wants, they could expicitly run reassociate twice in their
2381+
// pass pipeline for further potential gains.
2382+
// It might also be possible to update the pair map during runtime, but the
2383+
// overhead of that may be large if there's many reassociable chains.
2384+
// TODO: RPOT
2385+
// Get the functions basic blocks in Reverse Post Order. This order is used
2386+
// by BuildRankMap to pre calculate ranks correctly. It also excludes dead
2387+
// basic blocks (it has been seen that the analysis in this pass could hang
2388+
// when analysing dead basic blocks).
2389+
ReversePostOrderTraversal<Function *> RPOT(&F);
2390+
BuildPairMap(RPOT);
2391+
} // HLSL Change
23662392

23672393
MadeChange = false;
23682394
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
@@ -2389,8 +2415,10 @@ bool Reassociate::runOnFunction(Function &F) {
23892415
// We are done with the rank map and pair map.
23902416
RankMap.clear();
23912417
ValueRankMap.clear();
2392-
for (auto &Entry : PairMap)
2393-
Entry.clear();
2418+
if (HLSLEnableAggressiveReassociation) { // HLSL Change
2419+
for (auto &Entry : PairMap)
2420+
Entry.clear();
2421+
} // HLSL Change
23942422

23952423
return MadeChange;
23962424
}

tools/clang/lib/CodeGen/BackendUtil.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ void EmitAssemblyHelper::CreatePasses() {
357357
OptToggles.IsEnabled(hlsl::options::TOGGLE_LIFETIME_MARKERS);
358358
PMBuilder.HLSLEnablePartialLifetimeMarkers =
359359
OptToggles.IsEnabled(hlsl::options::TOGGLE_PARTIAL_LIFETIME_MARKERS);
360+
PMBuilder.HLSLEnableAggressiveReassociation = OptToggles.IsEnabled(
361+
hlsl::options::TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION);
360362
// HLSL Change - end
361363

362364
PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-enable aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,COMMON_FACTOR
2+
// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-disable aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,NO_COMMON_FACTOR
3+
4+
// Make sure DXC recognize the common factor and generate optimized dxils if the enable-aggressive-reassociation is true.
5+
6+
// CHECK: [[FACTOR_SRC1:%.*]] = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
7+
// CHECK: [[FACTOR_SRC0:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
8+
9+
// COMMON_FACTOR: [[FACTOR:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]]
10+
// COMMON_FACTOR: mul i32 [[FACTOR]],
11+
// COMMON_FACTOR: mul i32 [[FACTOR]],
12+
13+
// NO_COMMON_FACTOR: [[EXPRESSION_0:%.*]] = mul i32 [[FACTOR_SRC1]],
14+
// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_0]], [[FACTOR_SRC0]]
15+
// NO_COMMON_FACTOR: [[EXPRESSION_1:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]]
16+
// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_1]],
17+
18+
19+
RWTexture1D < float2 > outColorBuffer : register ( u0 ) ;
20+
21+
[ numthreads ( 8 , 8 , 1 ) ]
22+
void cs_main ( uint3 GroupID : SV_GroupID , uint GroupIndex : SV_GroupIndex , uint3 GTID : SV_GroupThreadID , uint3 DispatchThreadID : SV_DispatchThreadID )
23+
{
24+
// DXC should recognize (GroupIndex * GTID.x) is a common factor
25+
uint a = GroupIndex * GroupID.x;
26+
uint b = GroupIndex * DispatchThreadID.x;
27+
uint c = a * GTID.x;
28+
uint d = b * GTID.x;
29+
30+
outColorBuffer [ DispatchThreadID.y ] = float2(c, d);
31+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; RUN: %dxopt %s -reassociate,EnableAggressiveReassociation=1 -gvn -S | FileCheck %s -check-prefixes=CHECK,COMMON_FACTOR
2+
; RUN: %dxopt %s -reassociate,EnableAggressiveReassociation=0 -gvn -S | FileCheck %s -check-prefixes=CHECK,NO_COMMON_FACTOR
3+
4+
; CHECK: @test1
5+
6+
; COMMON_FACTOR: %[[FACTOR:.*]] = mul i32 %X4, %X3
7+
; COMMON_FACTOR-NEXT: %[[C:.*]] = mul i32 %[[FACTOR]], %X1
8+
; COMMON_FACTOR-NEXT: %[[D:.*]] = mul i32 %[[FACTOR]], %X2
9+
10+
; NO_COMMON_FACTOR: %[[A:.*]] = mul i32 %X3, %X1
11+
; NO_COMMON_FACTOR: %[[B:.*]] = mul i32 %X3, %X2
12+
; NO_COMMON_FACTOR: %[[C:.*]] = mul i32 %[[A]], %X4
13+
; NO_COMMON_FACTOR: %[[D:.*]] = mul i32 %[[B]], %X4
14+
15+
; CHECK: %[[E:.*]] = xor i32 %[[C]], %[[D]]
16+
; CHECK: ret i32 %[[E]]
17+
define i32 @test1(i32 %X1, i32 %X2, i32 %X3, i32 %X4) {
18+
%A = mul i32 %X3, %X1
19+
%B = mul i32 %X3, %X2
20+
%C = mul i32 %A, %X4
21+
%D = mul i32 %B, %X4
22+
%E = xor i32 %C, %D
23+
ret i32 %E
24+
}

utils/hct/hctdb.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -6525,7 +6525,14 @@ def add_pass(name, type_name, doc, opts):
65256525
[],
65266526
)
65276527
# createTailCallEliminationPass is removed - but is this checked before?
6528-
add_pass("reassociate", "Reassociate", "Reassociate expressions", [])
6528+
add_pass(
6529+
"reassociate",
6530+
"Reassociate",
6531+
"Reassociate expressions",
6532+
[
6533+
{"n": "EnableAggressiveReassociation", "t": "bool", "c": 1},
6534+
],
6535+
)
65296536
add_pass(
65306537
"loop-rotate",
65316538
"LoopRotate",

0 commit comments

Comments
 (0)