Skip to content

Commit 7dd8a48

Browse files
committed
ARM64-SVE: gathervector extends
1 parent 3e8b786 commit 7dd8a48

File tree

9 files changed

+1485
-47
lines changed

9 files changed

+1485
-47
lines changed

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,19 +1601,36 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
16011601
: gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, op3, intrinsic, simdBaseJitType,
16021602
simdSize);
16031603

1604-
#ifdef TARGET_XARCH
1605-
if ((intrinsic == NI_AVX2_GatherVector128) || (intrinsic == NI_AVX2_GatherVector256))
1604+
switch(intrinsic)
16061605
{
1607-
assert(varTypeIsSIMD(op2->TypeGet()));
1608-
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd));
1609-
}
1606+
#if defined(TARGET_XARCH)
1607+
case NI_AVX2_GatherVector128:
1608+
case NI_AVX2_GatherVector256:
1609+
assert(varTypeIsSIMD(op2->TypeGet()));
1610+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd));
1611+
break;
1612+
16101613
#elif defined(TARGET_ARM64)
1611-
if (intrinsic == NI_Sve_GatherVector)
1612-
{
1613-
assert(varTypeIsSIMD(op3->TypeGet()));
1614-
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd));
1615-
}
1614+
case NI_Sve_GatherVector:
1615+
case NI_Sve_GatherVectorByteZeroExtend:
1616+
case NI_Sve_GatherVectorInt16SignExtend:
1617+
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
1618+
case NI_Sve_GatherVectorInt32SignExtend:
1619+
case NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend:
1620+
case NI_Sve_GatherVectorSByteSignExtend:
1621+
case NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend:
1622+
case NI_Sve_GatherVectorUInt16ZeroExtend:
1623+
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
1624+
case NI_Sve_GatherVectorUInt32ZeroExtend:
1625+
assert(varTypeIsSIMD(op3->TypeGet()));
1626+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op3ClsHnd));
1627+
break;
16161628
#endif
1629+
1630+
default:
1631+
break;
1632+
}
1633+
16171634
break;
16181635
}
16191636

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1846,33 +1846,44 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
18461846
}
18471847

18481848
case NI_Sve_GatherVector:
1849+
case NI_Sve_GatherVectorByteZeroExtend:
1850+
case NI_Sve_GatherVectorInt16SignExtend:
1851+
case NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend:
1852+
case NI_Sve_GatherVectorInt32SignExtend:
1853+
case NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend:
1854+
case NI_Sve_GatherVectorSByteSignExtend:
1855+
case NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend:
1856+
case NI_Sve_GatherVectorUInt16ZeroExtend:
1857+
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
1858+
case NI_Sve_GatherVectorUInt32ZeroExtend:
18491859
{
18501860
if (!varTypeIsSIMD(intrin.op2->gtType))
18511861
{
1852-
// GatherVector(Vector<T> mask, T* address, Vector<T2> indices)
1862+
// GatherVector...(Vector<T> mask, T* address, Vector<T2> indices)
18531863

18541864
assert(intrin.numOperands == 3);
18551865
emitAttr baseSize = emitActualTypeSize(intrin.baseType);
18561866

18571867
if (baseSize == EA_8BYTE)
18581868
{
1859-
// Index is multiplied by 8
1860-
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt,
1861-
INS_SCALABLE_OPTS_LSL_N);
1869+
// Index is multiplied.
1870+
insScalableOpts sopt = (ins == INS_sve_ld1b || ins == INS_sve_ld1sb) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_LSL_N;
1871+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, sopt);
18621872
}
18631873
else
18641874
{
1865-
// Index is sign or zero extended to 64bits, then multiplied by 4
1875+
// Index is sign or zero extended to 64bits, then multiplied.
18661876
assert(baseSize == EA_4BYTE);
18671877
opt = varTypeIsUnsigned(node->GetAuxiliaryType()) ? INS_OPTS_SCALABLE_S_UXTW
18681878
: INS_OPTS_SCALABLE_S_SXTW;
1869-
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt,
1870-
INS_SCALABLE_OPTS_MOD_N);
1879+
1880+
insScalableOpts sopt = (ins == INS_sve_ld1b || ins == INS_sve_ld1sb) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_MOD_N;
1881+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, sopt);
18711882
}
18721883
}
18731884
else
18741885
{
1875-
// GatherVector(Vector<T> mask, Vector<T2> addresses)
1886+
// GatherVector...(Vector<T> mask, Vector<T2> addresses)
18761887

18771888
assert(intrin.numOperands == 2);
18781889
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, targetReg, op1Reg, op2Reg, 0, opt);

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ HARDWARE_INTRINSIC(Sve, FusedMultiplySubtract,
7676
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmls, INS_sve_fmls}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation)
7777
HARDWARE_INTRINSIC(Sve, FusedMultiplySubtractNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmls, INS_sve_fnmls}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
7878
HARDWARE_INTRINSIC(Sve, GatherVector, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
79+
HARDWARE_INTRINSIC(Sve, GatherVectorByteZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
80+
HARDWARE_INTRINSIC(Sve, GatherVectorInt16SignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
81+
HARDWARE_INTRINSIC(Sve, GatherVectorInt16WithByteOffsetsSignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
82+
HARDWARE_INTRINSIC(Sve, GatherVectorInt32SignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
83+
HARDWARE_INTRINSIC(Sve, GatherVectorInt32WithByteOffsetsSignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
84+
HARDWARE_INTRINSIC(Sve, GatherVectorSByteSignExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_sve_ld1sb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
85+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt16WithByteOffsetsZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
86+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt16ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
87+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32WithByteOffsetsZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
88+
HARDWARE_INTRINSIC(Sve, GatherVectorUInt32ZeroExtend, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
7989
HARDWARE_INTRINSIC(Sve, GetActiveElementCount, -1, 2, true, {INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp, INS_sve_cntp}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation)
8090
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
8191
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)

0 commit comments

Comments
 (0)