Skip to content

Commit 3471793

Browse files
jgu222igcbot
authored andcommitted
Move postRA noMask WA right after RA
To prevent insts/BBs from being deleted, move postRA noMask WA into a new pass: postRA_HWWorkaround, which is right after RA.
1 parent 60e99b7 commit 3471793

File tree

2 files changed

+36
-30
lines changed

2 files changed

+36
-30
lines changed

visa/Optimizer.cpp

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,6 +1386,7 @@ void Optimizer::initOptimizations()
13861386
INITIALIZE_PASS(preRA_HWWorkaround, vISA_EnableAlways, TimerID::MISC_OPTS);
13871387
INITIALIZE_PASS(regAlloc, vISA_EnableAlways, TimerID::TOTAL_RA);
13881388
INITIALIZE_PASS(removeLifetimeOps, vISA_EnableAlways, TimerID::MISC_OPTS);
1389+
INITIALIZE_PASS(postRA_HWWorkaround, vISA_EnableAlways, TimerID::MISC_OPTS);
13891390
INITIALIZE_PASS(countBankConflicts, vISA_OptReport, TimerID::MISC_OPTS);
13901391
INITIALIZE_PASS(removeRedundMov, vISA_EnableAlways, TimerID::MISC_OPTS);
13911392
INITIALIZE_PASS(removeEmptyBlocks, vISA_EnableAlways, TimerID::MISC_OPTS);
@@ -1930,7 +1931,7 @@ int Optimizer::optimization()
19301931
// PreRA scheduling
19311932
runPass(PI_preRA_Schedule);
19321933

1933-
// HW workaround before RA (assume no pseudo inst)
1934+
// HW workaround before RA
19341935
runPass(PI_preRA_HWWorkaround);
19351936

19361937
if (builder.enableACCBeforRA())
@@ -1951,6 +1952,9 @@ int Optimizer::optimization()
19511952

19521953
runPass(PI_removeLifetimeOps);
19531954

1955+
// HW workaround after RA
1956+
runPass(PI_postRA_HWWorkaround);
1957+
19541958
runPass(PI_countBankConflicts);
19551959

19561960
//
@@ -7401,16 +7405,6 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
74017405
// HW WAs that are done before RA.
74027406
void Optimizer::preRA_HWWorkaround()
74037407
{
7404-
// -forceNoMaskWA : to force running this WA pass on platform other than TGLLP.
7405-
// noMaskWA: only apply on TGLLP
7406-
// bit[1:0]: 0 - off
7407-
// 1 - on, replacing nomask in any divergent BB (conservative)
7408-
// 2 - on, replacing nomask in nested divergent BB (aggressive)
7409-
// 3 - not used, will behave the same as 2
7410-
// bit[2]: 0 - optimized. "emask flag" is created once per each BB
7411-
// 1 - simple insertion of "emask flag". A new flag is created
7412-
// each time it is needed, that is, created per each inst.
7413-
// (See comments for more details at doNoMaskWA().
74147408
if (builder.useNewNoMaskWA())
74157409
{
74167410
if (builder.hasFusedEUNoMaskWA())
@@ -7438,6 +7432,35 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
74387432
cloneSampleInst();
74397433
}
74407434

7435+
//
7436+
// HW WAs that are done right after RA.
7437+
// Sometime, a WA needs both preRA and postRA WA and postRA needs info from preRA (NoMask WA).
7438+
// If doing postWA in HWWorkaround, some instructions, or even basic blocks (ifcvt), are removed,
7439+
// which could interfere information passing from preRA to postRA. The loss of such the interference
7440+
// can cause postRA WA to fail. For this purpose, a postRA_HWWorkaround is added. This also means
7441+
// that BBs and insts between preRA pass and postRA pass remain undeleted (is it too strong?).
7442+
//
7443+
// Note that for those WAs that should be done after inst scheduling, they should go to
7444+
// HWWorkaround, not here, in order to prevent the scheduling from invalidating WAs.
7445+
//
7446+
void Optimizer::postRA_HWWorkaround()
7447+
{
7448+
if (builder.useNewNoMaskWA())
7449+
{
7450+
if (builder.hasFusedEUNoMaskWA())
7451+
{
7452+
newDoNoMaskWA_postRA();
7453+
}
7454+
}
7455+
else
7456+
{
7457+
if (builder.hasFusedEUNoMaskWA())
7458+
{
7459+
doNoMaskWA_postRA();
7460+
}
7461+
}
7462+
}
7463+
74417464
G4_INST* Optimizer::evenlySplitDPASInst(INST_LIST_ITER iter, G4_BB* bb)
74427465
{
74437466
auto* inst = *iter;
@@ -7706,21 +7729,6 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
77067729
// some workaround for HW restrictions. We apply them here so as not to affect optimizations, RA, and scheduling
77077730
void Optimizer::HWWorkaround()
77087731
{
7709-
if (builder.useNewNoMaskWA())
7710-
{
7711-
if (builder.hasFusedEUNoMaskWA())
7712-
{
7713-
newDoNoMaskWA_postRA();
7714-
}
7715-
}
7716-
else
7717-
{
7718-
if (builder.hasFusedEUNoMaskWA())
7719-
{
7720-
doNoMaskWA_postRA();
7721-
}
7722-
}
7723-
77247732
// Ensure the first instruction of a stack function has switch option.
77257733
if (fg.getIsStackCallFunc() &&
77267734
VISA_WA_CHECK(builder.getPWaTable(), WaThreadSwitchAfterCall))
@@ -11044,10 +11052,6 @@ void Optimizer::lowerMadSequence()
1104411052

1104511053
void Optimizer::ifCvt()
1104611054
{
11047-
// New NoMask WA requires the BB not merged. Conservatively turn ifcvt off.
11048-
if (builder.useNewNoMaskWA() && builder.hasFusedEUNoMaskWA())
11049-
return;
11050-
1105111055
runIfCvt(fg);
1105211056
}
1105311057

visa/Optimizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ class Optimizer
152152
bool foldCmpToCondMod(G4_BB* BB, INST_LIST_ITER& iter);
153153
void HWWorkaround();
154154
void preRA_HWWorkaround();
155+
void postRA_HWWorkaround();
155156
G4_INST* evenlySplitDPASInst(INST_LIST_ITER iter, G4_BB* bb);
156157
bool hasDPASSourceTwoReuse(DPASSrc2RSCache* src2GRFCache, G4_INST* inst);
157158
void DPASWA(G4_BB* bb, INST_LIST_ITER ii, DPASSrc2RSCache* src2GRFCache);
@@ -355,6 +356,7 @@ class Optimizer
355356
PI_optimizeLogicOperation,
356357
PI_HWConformityChk, // always
357358
PI_preRA_HWWorkaround, // always, each WA under specific control
359+
PI_postRA_HWWorkaround, // always, each WA under specific control
358360
PI_preRA_Schedule,
359361
PI_regAlloc, // always
360362
PI_removeLifetimeOps, // always

0 commit comments

Comments
 (0)