Skip to content

Commit d8cc047

Browse files
bgajdaINTCigcbot
authored andcommitted
Refactor logic around movi.
Extract it to new common function, use it to prevent hoisting in few edge cases. Add experimental support for extended cases.
1 parent 3497f70 commit d8cc047

File tree

12 files changed

+307
-102
lines changed

12 files changed

+307
-102
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4842,6 +4842,11 @@ namespace IGC
48424842
SaveOption(vISA_ALTMode, true);
48434843
}
48444844

4845+
if (IGC_GET_FLAG_VALUE(EnableEmitMoreMoviCases))
4846+
{
4847+
SaveOption(vISA_emitMoreMoviCases, true);
4848+
}
4849+
48454850
//
48464851
// Setting number of GRF and threads per EU is restricted to OCL only
48474852
// Number of threads can be set by:

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5829,13 +5829,31 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
58295829
// bit set. Inactive lanes contain garbage data and may cause an
58305830
// out-of-bounds register access.
58315831
bool laneIdCanBeOOB = !m_currShader->m_DriverInfo->needsRegisterAccessBoundsChecks();
5832-
if (!m_currShader->m_Platform->supportsOutOfBoundsGrfAccess() &&
5833-
!channelUniform &&
5834-
(m_encoder->IsSubSpanDestination() || laneIdCanBeOOB))
5832+
5833+
bool defaultConditions =
5834+
(!m_currShader->m_Platform->supportsOutOfBoundsGrfAccess() &&
5835+
!channelUniform &&
5836+
(m_encoder->IsSubSpanDestination() || laneIdCanBeOOB));
5837+
5838+
// Enabling movi requires that first lane even inactive will be within bounds of register we want.
5839+
// It also is limited to accessing single GRF.
5840+
// For uniform channel which will be simd1 there's probably no gain in movi.
5841+
bool isSingleGrf = data->GetSize() <= (unsigned)getGRFSize();
5842+
bool platformMoviTypeCheck = m_currShader->m_Platform->allowsMoviForType(data->GetType());
5843+
bool forcePreventOOB = isSingleGrf && platformMoviTypeCheck && !channelUniform;
5844+
5845+
if (defaultConditions || forcePreventOOB)
58355846
{
58365847
uint maskOfValidLanes = numLanes(m_currShader->m_State.m_dispatchSize) - 1;
58375848
m_encoder->SetSrcRegion(0, 2, 1, 0);
58385849
m_encoder->SetDstRegion(2);
5850+
5851+
// To support conversion to movi we need to make all calculations (shl, addr_add) of address NoMask.
5852+
// to avoid random data from previous execution in divergent CF.
5853+
if (forcePreventOOB)
5854+
{
5855+
m_encoder->SetNoMask();
5856+
}
58395857
m_encoder->And(simdChannelUW, simdChannelUW,
58405858
m_currShader->ImmToVariable(maskOfValidLanes, ISA_TYPE_UW));
58415859
m_encoder->Push();
@@ -5851,6 +5869,10 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
58515869
{
58525870
m_encoder->SetSrcRegion(0, 16, 8, 2);
58535871
}
5872+
if (forcePreventOOB)
5873+
{
5874+
m_encoder->SetNoMask();
5875+
}
58545876
m_encoder->Shl(pSrcElm, simdChannelUW,
58555877
m_currShader->ImmToVariable(shtAmt, ISA_TYPE_UW));
58565878
m_encoder->Push();
@@ -5889,6 +5911,10 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
58895911
else // !channelUniform
58905912
{
58915913
m_encoder->SetSimdSize(SIMDMode::SIMD16);
5914+
if (forcePreventOOB)
5915+
{
5916+
m_encoder->SetNoMask();
5917+
}
58925918

58935919
m_encoder->AddrAdd(pDstArrElm, src, pSrcElm);
58945920
m_encoder->Push();
@@ -5924,6 +5950,10 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
59245950
m_encoder->SetMask(EMASK_H2);
59255951
m_encoder->SetSrcSubReg(0, 16);
59265952
m_encoder->SetSrcSubReg(1, 16);
5953+
if (forcePreventOOB)
5954+
{
5955+
m_encoder->SetNoMask();
5956+
}
59275957
m_encoder->AddrAdd(pDstArrElm, src, pSrcElm);
59285958
m_encoder->Push();
59295959

@@ -5994,6 +6024,10 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
59946024
{
59956025
// also calculate the second half of address
59966026
m_encoder->SetSrcRegion(0, 16, 8, 2);
6027+
if (forcePreventOOB)
6028+
{
6029+
m_encoder->SetNoMask();
6030+
}
59976031
m_encoder->Shl(pSrcElm, simdChannelUW,
59986032
m_currShader->ImmToVariable(shtAmt, ISA_TYPE_UW));
59996033
m_encoder->Push();
@@ -6016,6 +6050,11 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
60166050
true,
60176051
m_destination->getName());
60186052

6053+
if (forcePreventOOB)
6054+
{
6055+
m_encoder->SetNoMask();
6056+
}
6057+
60196058
m_encoder->AddrAdd(pDstArrElm, src, pSrcElm);
60206059
m_encoder->Push();
60216060

@@ -6025,6 +6064,10 @@ void EmitPass::emitSimdShuffle(llvm::Instruction* inst)
60256064
if (isSimd32)
60266065
{
60276066
m_encoder->SetSecondHalf(true);
6067+
if (forcePreventOOB)
6068+
{
6069+
m_encoder->SetNoMask();
6070+
}
60286071
m_encoder->AddrAdd(pDstArrElm, src, pSrcElm);
60296072
m_encoder->Push();
60306073
m_encoder->Copy(m_destination, pDstArrElm);

IGC/Compiler/CISACodeGen/Platform.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,5 +1910,9 @@ bool allowProceedBasedApproachForRayQueryDynamicRayManagementMechanism() const
19101910
return IGC_IS_FLAG_DISABLED(DisableProceedBasedApproachForRayQueryDynamicRayManagementMechanism);
19111911
}
19121912

1913+
bool allowsMoviForType(VISA_Type type) const {
1914+
return IGC_GET_FLAG_VALUE(EnableEmitMoreMoviCases) && (type == ISA_TYPE_UD || type == ISA_TYPE_D);
1915+
}
1916+
19131917
};
19141918
}//namespace IGC

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1951,7 +1951,7 @@ void CustomSafeOptPass::visitTruncInst(TruncInst& I)
19511951
To:
19521952
%335 = call i16 @llvm.genx.GenISA.WaveShuffleIndex.i16(i16 %orig, i32 %333, i32 0)
19531953
*/
1954-
if( I.getSrcTy()->isIntegerTy( 32 ) && I.getDestTy()->isIntegerTy( 16 ) )
1954+
if( I.getSrcTy()->isIntegerTy( 32 ) && I.getDestTy()->isIntegerTy( 16 ) && IGC_IS_FLAG_DISABLED(EnableEmitMoreMoviCases))
19551955
{
19561956
// We know all variants of shuffle from zext are safe to demote. (unlike WaveAll which might not be)
19571957
if( auto* genIntr = dyn_cast<GenIntrinsicInst>( I.getOperand( 0 ) ); genIntr && isSubGroupShuffleVariant( genIntr ) && genIntr->hasOneUse() )

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,7 @@ DECLARE_IGC_REGKEY_ENUM(LscStoreCacheControlOverride, 0, "Overrides cache-contro
684684
DECLARE_IGC_REGKEY_ENUM(TgmLoadCacheControlOverride, 0, "Overrides cache-control options for non-intrinsic LSC tgm loads.", LSC_CACHE_CTRL_OPTIONS, true)
685685
DECLARE_IGC_REGKEY_ENUM(TgmStoreCacheControlOverride, 0, "Overrides cache-control options for non-intrinsic LSC tgm stores.", LSC_CACHE_CTRL_OPTIONS, true)
686686
DECLARE_IGC_REGKEY(bool, LscForceSpillNonStackcall, false, "Non-stack call kernels that spill will use LSC on DG2+", true)
687+
DECLARE_IGC_REGKEY(bool, EnableEmitMoreMoviCases, false, "Enables emitting movi for waveShuffle cases using And to keep index within single register.", true)
687688
DECLARE_IGC_REGKEY_ENUM(ForceRegisterAccessBoundsChecks, -1, \
688689
"Controls the behavior of RegisterAccessBoundsChecks, the pass that adds runtime bounds-checks for vector-indexing instructions." \
689690
"-1 - default behavior, the pass is enabled based on the API type or AILs" \

visa/BuildIR.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2423,9 +2423,12 @@ class IR_Builder {
24232423

24242424
void materializeGlobalImm(G4_BB *entryBB); // why is in FlowGraph.cpp???
24252425

2426+
bool canPromoteToMovi(G4_INST *);
2427+
24262428
#include "HWCaps.inc"
24272429

24282430
private:
2431+
G4_INST* getSingleDefInst(G4_INST* UI, Gen4_Operand_Number OpndNum) const;
24292432
G4_SrcRegRegion *createBindlessExDesc(uint32_t exdesc);
24302433
uint32_t createSamplerMsgDesc(VISASampler3DSubOpCode samplerOp,
24312434
bool isNativeSIMDSize, bool isFP16Return,

0 commit comments

Comments
 (0)