@@ -1386,6 +1386,7 @@ void Optimizer::initOptimizations()
13861386 INITIALIZE_PASS(preRA_HWWorkaround, vISA_EnableAlways, TimerID::MISC_OPTS);
13871387 INITIALIZE_PASS(regAlloc, vISA_EnableAlways, TimerID::TOTAL_RA);
13881388 INITIALIZE_PASS(removeLifetimeOps, vISA_EnableAlways, TimerID::MISC_OPTS);
1389+ INITIALIZE_PASS(postRA_HWWorkaround, vISA_EnableAlways, TimerID::MISC_OPTS);
13891390 INITIALIZE_PASS(countBankConflicts, vISA_OptReport, TimerID::MISC_OPTS);
13901391 INITIALIZE_PASS(removeRedundMov, vISA_EnableAlways, TimerID::MISC_OPTS);
13911392 INITIALIZE_PASS(removeEmptyBlocks, vISA_EnableAlways, TimerID::MISC_OPTS);
@@ -1930,7 +1931,7 @@ int Optimizer::optimization()
19301931 // PreRA scheduling
19311932 runPass(PI_preRA_Schedule);
19321933
1933- // HW workaround before RA (assume no pseudo inst)
1934+ // HW workaround before RA
19341935 runPass(PI_preRA_HWWorkaround);
19351936
19361937 if (builder.enableACCBeforRA())
@@ -1951,6 +1952,9 @@ int Optimizer::optimization()
19511952
19521953 runPass(PI_removeLifetimeOps);
19531954
1955+ // HW workaround after RA
1956+ runPass(PI_postRA_HWWorkaround);
1957+
19541958 runPass(PI_countBankConflicts);
19551959
19561960 //
@@ -7401,16 +7405,6 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
74017405 // HW WAs that are done before RA.
74027406 void Optimizer::preRA_HWWorkaround()
74037407 {
7404- // -forceNoMaskWA : to force running this WA pass on platform other than TGLLP.
7405- // noMaskWA: only apply on TGLLP
7406- // bit[1:0]: 0 - off
7407- // 1 - on, replacing nomask in any divergent BB (conservative)
7408- // 2 - on, replacing nomask in nested divergent BB (aggressive)
7409- // 3 - not used, will behave the same as 2
7410- // bit[2]: 0 - optimized. "emask flag" is created once per each BB
7411- // 1 - simple insertion of "emask flag". A new flag is created
7412- // each time it is needed, that is, created per each inst.
7413- // (See comments for more details at doNoMaskWA().
74147408 if (builder.useNewNoMaskWA())
74157409 {
74167410 if (builder.hasFusedEUNoMaskWA())
@@ -7438,6 +7432,35 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
74387432 cloneSampleInst();
74397433 }
74407434
7435+ //
7436+ // HW WAs that are done right after RA.
7437+ // Sometime, a WA needs both preRA and postRA WA and postRA needs info from preRA (NoMask WA).
7438+ // If doing postWA in HWWorkaround, some instructions, or even basic blocks (ifcvt), are removed,
7439+ // which could interfere information passing from preRA to postRA. The loss of such the interference
7440+ // can cause postRA WA to fail. For this purpose, a postRA_HWWorkaround is added. This also means
7441+ // that BBs and insts between preRA pass and postRA pass remain undeleted (is it too strong?).
7442+ //
7443+ // Note that for those WAs that should be done after inst scheduling, they should go to
7444+ // HWWorkaround, not here, in order to prevent the scheduling from invalidating WAs.
7445+ //
7446+ void Optimizer::postRA_HWWorkaround()
7447+ {
7448+ if (builder.useNewNoMaskWA())
7449+ {
7450+ if (builder.hasFusedEUNoMaskWA())
7451+ {
7452+ newDoNoMaskWA_postRA();
7453+ }
7454+ }
7455+ else
7456+ {
7457+ if (builder.hasFusedEUNoMaskWA())
7458+ {
7459+ doNoMaskWA_postRA();
7460+ }
7461+ }
7462+ }
7463+
74417464 G4_INST* Optimizer::evenlySplitDPASInst(INST_LIST_ITER iter, G4_BB* bb)
74427465 {
74437466 auto* inst = *iter;
@@ -7706,21 +7729,6 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
77067729 // some workaround for HW restrictions. We apply them here so as not to affect optimizations, RA, and scheduling
77077730 void Optimizer::HWWorkaround()
77087731 {
7709- if (builder.useNewNoMaskWA())
7710- {
7711- if (builder.hasFusedEUNoMaskWA())
7712- {
7713- newDoNoMaskWA_postRA();
7714- }
7715- }
7716- else
7717- {
7718- if (builder.hasFusedEUNoMaskWA())
7719- {
7720- doNoMaskWA_postRA();
7721- }
7722- }
7723-
77247732 // Ensure the first instruction of a stack function has switch option.
77257733 if (fg.getIsStackCallFunc() &&
77267734 VISA_WA_CHECK(builder.getPWaTable(), WaThreadSwitchAfterCall))
@@ -11044,10 +11052,6 @@ void Optimizer::lowerMadSequence()
1104411052
1104511053void Optimizer::ifCvt()
1104611054{
11047- // New NoMask WA requires the BB not merged. Conservatively turn ifcvt off.
11048- if (builder.useNewNoMaskWA() && builder.hasFusedEUNoMaskWA())
11049- return;
11050-
1105111055 runIfCvt(fg);
1105211056}
1105311057
0 commit comments