Skip to content

Commit 83b7b96

Browse files
committed
Add a flag for the upstream global reassociation algorithm change
This PR (#6598) pulls the upstream global reassociation algorithm change in DXC and can reduce redundant calculations obviously. However, from the testing result of a large offline suite of shaders, some shaders got worse compilation results and couldn't benefit from this upstream change. This PR adds a flag for the upstream global reassociation change. It would be easier to roll back if a shader get worse compilation result due to this upstream change. This is part 2 of the fix for #6593.
1 parent 0c42a77 commit 83b7b96

File tree

7 files changed

+59
-2
lines changed

7 files changed

+59
-2
lines changed

include/dxc/Support/DxcOptToggles.h

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ enum {
3838
static const Toggle TOGGLE_GVN = {"gvn", DEFAULT_ON};
3939
static const Toggle TOGGLE_LICM = {"licm", DEFAULT_ON};
4040
static const Toggle TOGGLE_SINK = {"sink", DEFAULT_ON};
41+
static const Toggle TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION = {
42+
"enable-aggressive-reassociation", DEFAULT_ON};
4143
static const Toggle TOGGLE_LIFETIME_MARKERS = {"lifetime-markers", DEFAULT_ON};
4244
static const Toggle TOGGLE_PARTIAL_LIFETIME_MARKERS = {
4345
"partial-lifetime-markers", DEFAULT_OFF};

include/llvm/Transforms/IPO/PassManagerBuilder.h

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ class PassManagerBuilder {
134134
unsigned ScanLimit = 0; // HLSL Change
135135
bool EnableGVN = true; // HLSL Change
136136
bool StructurizeLoopExitsForUnroll = false; // HLSL Change
137+
bool HLSLEnableAggressiveReassociation = true; // HLSL Change
137138
bool HLSLEnableLifetimeMarkers = false; // HLSL Change
138139
bool HLSLEnablePartialLifetimeMarkers = false; // HLSL Change
139140
bool HLSLEnableDebugNops = false; // HLSL Change

include/llvm/Transforms/Scalar.h

+2
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ extern char &DemoteRegisterToMemoryHlslID;
331331
// For example: 4 + (x + 5) -> x + (4 + 5)
332332
//
333333
FunctionPass *createReassociatePass();
334+
FunctionPass *
335+
createReassociatePass(bool HLSLEnableAggressiveReassociation); // HLSL Change
334336

335337
//===----------------------------------------------------------------------===//
336338
//

lib/Transforms/IPO/PassManagerBuilder.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,8 @@ void PassManagerBuilder::populateModulePassManager(
469469
//MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
470470
// HLSL Change Ends.
471471
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
472-
MPM.add(createReassociatePass()); // Reassociate expressions
472+
MPM.add(createReassociatePass(
473+
HLSLEnableAggressiveReassociation)); // Reassociate expressions
473474
// Rotate Loop - disable header duplication at -Oz
474475
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
475476
// HLSL Change - disable LICM in frontend for not consider register pressure.

lib/Transforms/Scalar/Reassociate.cpp

+19-1
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,16 @@ namespace {
176176
initializeReassociatePass(*PassRegistry::getPassRegistry());
177177
}
178178

179+
// HLSL Change - begin
180+
// Enable global reassociation when HLSLEnableAggressiveReassociation is
181+
// set
182+
bool m_HLSLEnableAggressiveReassociation = true;
183+
Reassociate(bool HLSLEnableAggressiveReassociation) : FunctionPass(ID) {
184+
initializeReassociatePass(*PassRegistry::getPassRegistry());
185+
m_HLSLEnableAggressiveReassociation = HLSLEnableAggressiveReassociation;
186+
}
187+
// HLSL Change - end
188+
179189
bool runOnFunction(Function &F) override;
180190

181191
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -242,6 +252,13 @@ INITIALIZE_PASS(Reassociate, "reassociate",
242252
// Public interface to the Reassociate pass
243253
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
244254

255+
// HLSL Change - begin
256+
FunctionPass *
257+
llvm::createReassociatePass(bool HLSLEnableAggressiveReassociation) {
258+
return new Reassociate(HLSLEnableAggressiveReassociation);
259+
}
260+
// HLSL Change - end
261+
245262
/// Return true if V is an instruction of the specified opcode and if it
246263
/// only has one use.
247264
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
@@ -2243,7 +2260,8 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
22432260
return;
22442261
}
22452262

2246-
if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
2263+
if (m_HLSLEnableAggressiveReassociation && // HLSL Change
2264+
(Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit)) {
22472265
// Find the pair with the highest count in the pairmap and move it to the
22482266
// back of the list so that it can later be CSE'd.
22492267
// example:

tools/clang/lib/CodeGen/BackendUtil.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ void EmitAssemblyHelper::CreatePasses() {
357357
OptToggles.IsEnabled(hlsl::options::TOGGLE_LIFETIME_MARKERS);
358358
PMBuilder.HLSLEnablePartialLifetimeMarkers =
359359
OptToggles.IsEnabled(hlsl::options::TOGGLE_PARTIAL_LIFETIME_MARKERS);
360+
PMBuilder.HLSLEnableAggressiveReassociation =
361+
OptToggles.IsEnabled(hlsl::options::TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION);
360362
// HLSL Change - end
361363

362364
PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-enable enable-aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,COMMON_FACTOR
2+
// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-disable enable-aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,NO_COMMON_FACTOR
3+
4+
// Make sure DXC recognize the common factor and generate optimized dxils if the enable-aggressive-reassociation is true.
5+
6+
// CHECK: [[FACTOR_SRC1:%.*]] = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
7+
// CHECK: [[FACTOR_SRC0:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
8+
9+
// COMMON_FACTOR: [[FACTOR:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]]
10+
// COMMON_FACTOR: mul i32 [[FACTOR]],
11+
// COMMON_FACTOR: mul i32 [[FACTOR]],
12+
13+
// NO_COMMON_FACTOR: [[EXPRESSION_0:%.*]] = mul i32 [[FACTOR_SRC1]],
14+
// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_0]], [[FACTOR_SRC0]]
15+
// NO_COMMON_FACTOR: [[EXPRESSION_1:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]]
16+
// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_1]],
17+
18+
19+
RWTexture1D < float2 > outColorBuffer : register ( u0 ) ;
20+
21+
[ numthreads ( 8 , 8 , 1 ) ]
22+
void cs_main ( uint3 GroupID : SV_GroupID , uint GroupIndex : SV_GroupIndex , uint3 GTID : SV_GroupThreadID , uint3 DispatchThreadID : SV_DispatchThreadID )
23+
{
24+
// DXC should recognize (GroupIndex * GTID.x) is a common factor
25+
uint a = GroupIndex * GroupID.x;
26+
uint b = GroupIndex * DispatchThreadID.x;
27+
uint c = a * GTID.x;
28+
uint d = b * GTID.x;
29+
30+
outColorBuffer [ DispatchThreadID.y ] = float2(c, d);
31+
}

0 commit comments

Comments
 (0)