Skip to content

Commit 6477b41

Browse files
authored
[llvm][AArch64][Assembly]: Add FP8FMA assembly and disassembly. (#70134)
This patch adds the feature flag FP8FMA and the assembly/disassembly for the following instructions of NEON and SVE2: * NEON: - FMLALBlane - FMLALTlane - FMLALLBBlane - FMLALLBTlane - FMLALLTBlane - FMLALLTTlane - FMLALB - FMLALT - FMLALLB - FMLALLBT - FMLALLTB - FMLALLTT * SVE2: - FMLALB_ZZZI - FMLALT_ZZZI - FMLALB_ZZZ - FMLALT_ZZZ - FMLALLBB_ZZZI - FMLALLBT_ZZZI - FMLALLTB_ZZZI - FMLALLTT_ZZZI - FMLALLBB_ZZZ - FMLALLBT_ZZZ - FMLALLTB_ZZZ - FMLALLTT_ZZZ That is according to this documentation: https://developer.arm.com/documentation/ddi0602/2023-09
1 parent 47d9fbc commit 6477b41

22 files changed

+944
-53
lines changed

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ enum ArchExtKind : unsigned {
162162
AEK_FPMR = 58, // FEAT_FPMR
163163
AEK_FP8 = 59, // FEAT_FP8
164164
AEK_FAMINMAX = 60, // FEAT_FAMINMAX
165+
AEK_FP8FMA = 61, // FEAT_FP8FMA
166+
AEK_SSVE_FP8FMA = 62, // FEAT_SSVE_FP8FMA
165167
AEK_NUM_EXTENSIONS
166168
};
167169
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -273,6 +275,8 @@ inline constexpr ExtensionInfo Extensions[] = {
273275
{"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0},
274276
{"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "+fpmr", 0},
275277
{"faminmax", AArch64::AEK_FAMINMAX, "+faminmax", "-faminmax", FEAT_INIT, "", 0},
278+
{"fp8fma", AArch64::AEK_FP8FMA, "+fp8fma", "-fp8fma", FEAT_INIT, "+fpmr", 0},
279+
{"ssve-fp8fma", AArch64::AEK_SSVE_FP8FMA, "+ssve-fp8fma", "-ssve-fp8fma", FEAT_INIT, "+sme2", 0},
276280
// Special cases
277281
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
278282
};

llvm/include/llvm/TargetParser/SubtargetFeature.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ namespace llvm {
3131
class raw_ostream;
3232
class Triple;
3333

34-
const unsigned MAX_SUBTARGET_WORDS = 4;
34+
const unsigned MAX_SUBTARGET_WORDS = 5;
3535
const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
3636

3737
/// Container class for subtarget features.

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,12 @@ def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
517517
def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true",
518518
"Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;
519519

520+
def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",
521+
"Enable fp8 multiply-add instructions (FEAT_FP8FMA)">;
522+
523+
def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",
524+
"Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>;
525+
520526
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
521527
"Apple A7 (the CPU formerly known as Cyclone)">;
522528

@@ -747,7 +753,7 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
747753
def SVE2p1Unsupported : AArch64Unsupported;
748754

749755
def SVE2Unsupported : AArch64Unsupported {
750-
let F = !listconcat([HasSVE2, HasSVE2orSME,
756+
let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA,
751757
HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
752758
SVE2p1Unsupported.F);
753759
}
@@ -761,7 +767,7 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
761767
def SME2p1Unsupported : AArch64Unsupported;
762768

763769
def SME2Unsupported : AArch64Unsupported {
764-
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2],
770+
let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA],
765771
SME2p1Unsupported.F);
766772
}
767773

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6055,6 +6055,15 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
60556055
v4f32, v8f16, OpNode>;
60566056
}
60576057

6058+
multiclass SIMDThreeSameVectorMLA<bit Q, string asm>{
6059+
def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
6060+
V128, v8f16, v16i8, null_frag>;
6061+
}
6062+
6063+
multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm>{
6064+
def v4f32 : BaseSIMDThreeSameVectorDot<Q, 0b0, sz, 0b1000, asm, ".4s", ".16b",
6065+
V128, v4f32, v16i8, null_frag>;
6066+
}
60586067

60596068
// FP8 assembly/disassembly classes
60606069

@@ -8521,6 +8530,31 @@ class BF16ToSinglePrecision<string asm>
85218530
}
85228531
} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
85238532

8533+
//----------------------------------------------------------------------------
8534+
class BaseSIMDThreeSameVectorIndexB<bit Q, bit U, bits<2> sz, bits<4> opc,
8535+
string asm, string dst_kind,
8536+
RegisterOperand RegType,
8537+
RegisterOperand RegType_lo>
8538+
: BaseSIMDIndexedTied<Q, U, 0b0, sz, opc,
8539+
RegType, RegType, RegType_lo, VectorIndexB,
8540+
asm, "", dst_kind, ".16b", ".b", []> {
8541+
8542+
// idx = H:L:M
8543+
bits<4> idx;
8544+
let Inst{11} = idx{3};
8545+
let Inst{21-19} = idx{2-0};
8546+
}
8547+
8548+
multiclass SIMDThreeSameVectorMLAIndex<bit Q, string asm> {
8549+
def v8f16 : BaseSIMDThreeSameVectorIndexB<Q, 0b0, 0b11, 0b0000, asm, ".8h",
8550+
V128, V128_0to7>;
8551+
}
8552+
8553+
multiclass SIMDThreeSameVectorMLALIndex<bit Q, bits<2> sz, string asm> {
8554+
def v4f32 : BaseSIMDThreeSameVectorIndexB<Q, 0b1, sz, 0b1000, asm, ".4s",
8555+
V128, V128_0to7>;
8556+
}
8557+
85248558
//----------------------------------------------------------------------------
85258559
// Armv8.6 Matrix Multiply Extension
85268560
//----------------------------------------------------------------------------

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,13 @@ def HasFP8 : Predicate<"Subtarget->hasFP8()">,
166166
AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
167167
def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
168168
AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
169+
def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
170+
AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
171+
def HasSSVE_FP8FMA : Predicate<"Subtarget->SSVE_FP8FMA() || "
172+
"(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
173+
AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
174+
(all_of FeatureSVE2, FeatureFP8FMA)),
175+
"ssve-fp8fma or (sve2 and fp8fma)">;
169176

170177
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
171178
// they should be enabled if either has been specified.
@@ -9286,6 +9293,21 @@ let Predicates = [HasFAMINMAX] in {
92869293
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
92879294
} // End let Predicates = [HasFAMAXMIN]
92889295

9296+
let Predicates = [HasFP8FMA] in {
9297+
defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
9298+
defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
9299+
defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
9300+
defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
9301+
defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
9302+
defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
9303+
9304+
defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
9305+
defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
9306+
defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
9307+
defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
9308+
defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
9309+
defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
9310+
} // End let Predicates = [HasFP8FMA]
92899311

92909312
include "AArch64InstrAtomics.td"
92919313
include "AArch64SVEInstrInfo.td"

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,8 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
981981
case AArch64::FPR64_loRegClassID:
982982
case AArch64::FPR16_loRegClassID:
983983
return 16;
984+
case AArch64::FPR128_0to7RegClassID:
985+
return 8;
984986
}
985987
}
986988

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,13 @@ def FPR128_lo : RegisterClass<"AArch64",
467467
v8bf16],
468468
128, (trunc FPR128, 16)>;
469469

470+
// The lower 8 vector registers. Some instructions can only take registers
471+
// in this range.
472+
def FPR128_0to7 : RegisterClass<"AArch64",
473+
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
474+
v8bf16],
475+
128, (trunc FPR128, 8)>;
476+
470477
// Pairs, triples, and quads of 64-bit vector registers.
471478
def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
472479
def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
@@ -534,6 +541,15 @@ def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
534541
let ParserMatchClass = VectorRegLoAsmOperand;
535542
}
536543

544+
def VectorReg0to7AsmOperand : AsmOperandClass {
545+
let Name = "VectorReg0to7";
546+
let PredicateMethod = "isNeonVectorReg0to7";
547+
}
548+
549+
def V128_0to7 : RegisterOperand<FPR128_0to7, "printVRegOperand"> {
550+
let ParserMatchClass = VectorReg0to7AsmOperand;
551+
}
552+
537553
class TypedVecListAsmOperand<int count, string vecty, int lanes, int eltsize>
538554
: AsmOperandClass {
539555
let Name = "TypedVectorList" # count # "_" # lanes # eltsize;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4045,3 +4045,22 @@ let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
40454045
defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>;
40464046
defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>;
40474047
} // End HasSVE2orSME2, HasFAMINMAX
4048+
4049+
let Predicates = [HasSSVE_FP8FMA] in {
4050+
// FP8 Widening Multiply-Add Long - Indexed Group
4051+
def FMLALB_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b0, "fmlalb">;
4052+
def FMLALT_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b1, "fmlalt">;
4053+
// FP8 Widening Multiply-Add Long Group
4054+
def FMLALB_ZZZ : sve2_fp8_mla<0b100, ZPR16, "fmlalb">;
4055+
def FMLALT_ZZZ : sve2_fp8_mla<0b101, ZPR16, "fmlalt">;
4056+
// FP8 Widening Multiply-Add Long Long - Indexed Group
4057+
def FMLALLBB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b00, "fmlallbb">;
4058+
def FMLALLBT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b01, "fmlallbt">;
4059+
def FMLALLTB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b10, "fmlalltb">;
4060+
def FMLALLTT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b11, "fmlalltt">;
4061+
// FP8 Widening Multiply-Add Long Long Group
4062+
def FMLALLBB_ZZZ : sve2_fp8_mla<0b000, ZPR32, "fmlallbb">;
4063+
def FMLALLBT_ZZZ : sve2_fp8_mla<0b001, ZPR32, "fmlallbt">;
4064+
def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">;
4065+
def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">;
4066+
} // End HasSSVE_FP8FMA

llvm/lib/Target/AArch64/AArch64SchedA64FX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
2323
list<Predicate> UnsupportedFeatures =
2424
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
2525
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
26-
HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
26+
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA];
2727

2828
let FullInstRWOverlapCheck = 0;
2929
}

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,12 @@ class AArch64Operand : public MCParsedAsmOperand {
12231223
Reg.RegNum));
12241224
}
12251225

1226+
bool isNeonVectorReg0to7() const {
1227+
return Kind == k_Register && Reg.Kind == RegKind::NeonVector &&
1228+
(AArch64MCRegisterClasses[AArch64::FPR128_0to7RegClassID].contains(
1229+
Reg.RegNum));
1230+
}
1231+
12261232
bool isMatrix() const { return Kind == k_MatrixRegister; }
12271233
bool isMatrixTileList() const { return Kind == k_MatrixTileList; }
12281234

@@ -1766,6 +1772,11 @@ class AArch64Operand : public MCParsedAsmOperand {
17661772
Inst.addOperand(MCOperand::createReg(getReg()));
17671773
}
17681774

1775+
void addVectorReg0to7Operands(MCInst &Inst, unsigned N) const {
1776+
assert(N == 1 && "Invalid number of operands!");
1777+
Inst.addOperand(MCOperand::createReg(getReg()));
1778+
}
1779+
17691780
enum VecListIndexType {
17701781
VecListIdx_DReg = 0,
17711782
VecListIdx_QReg = 1,
@@ -2598,31 +2609,31 @@ static std::optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
25982609

25992610
switch (VectorKind) {
26002611
case RegKind::NeonVector:
2601-
Res =
2602-
StringSwitch<std::pair<int, int>>(Suffix.lower())
2603-
.Case("", {0, 0})
2604-
.Case(".1d", {1, 64})
2605-
.Case(".1q", {1, 128})
2606-
// '.2h' needed for fp16 scalar pairwise reductions
2607-
.Case(".2h", {2, 16})
2608-
.Case(".2s", {2, 32})
2609-
.Case(".2d", {2, 64})
2610-
// '.4b' is another special case for the ARMv8.2a dot product
2611-
// operand
2612-
.Case(".4b", {4, 8})
2613-
.Case(".4h", {4, 16})
2614-
.Case(".4s", {4, 32})
2615-
.Case(".8b", {8, 8})
2616-
.Case(".8h", {8, 16})
2617-
.Case(".16b", {16, 8})
2618-
// Accept the width neutral ones, too, for verbose syntax. If those
2619-
// aren't used in the right places, the token operand won't match so
2620-
// all will work out.
2621-
.Case(".b", {0, 8})
2622-
.Case(".h", {0, 16})
2623-
.Case(".s", {0, 32})
2624-
.Case(".d", {0, 64})
2625-
.Default({-1, -1});
2612+
Res = StringSwitch<std::pair<int, int>>(Suffix.lower())
2613+
.Case("", {0, 0})
2614+
.Case(".1d", {1, 64})
2615+
.Case(".1q", {1, 128})
2616+
// '.2h' needed for fp16 scalar pairwise reductions
2617+
.Case(".2h", {2, 16})
2618+
.Case(".2b", {2, 8})
2619+
.Case(".2s", {2, 32})
2620+
.Case(".2d", {2, 64})
2621+
// '.4b' is another special case for the ARMv8.2a dot product
2622+
// operand
2623+
.Case(".4b", {4, 8})
2624+
.Case(".4h", {4, 16})
2625+
.Case(".4s", {4, 32})
2626+
.Case(".8b", {8, 8})
2627+
.Case(".8h", {8, 16})
2628+
.Case(".16b", {16, 8})
2629+
// Accept the width neutral ones, too, for verbose syntax. If
2630+
// those aren't used in the right places, the token operand won't
2631+
// match so all will work out.
2632+
.Case(".b", {0, 8})
2633+
.Case(".h", {0, 16})
2634+
.Case(".s", {0, 32})
2635+
.Case(".d", {0, 64})
2636+
.Default({-1, -1});
26262637
break;
26272638
case RegKind::SVEPredicateAsCounter:
26282639
case RegKind::SVEPredicateVector:
@@ -3641,6 +3652,8 @@ static const struct Extension {
36413652
{"fpmr", {AArch64::FeatureFPMR}},
36423653
{"fp8", {AArch64::FeatureFP8}},
36433654
{"faminmax", {AArch64::FeatureFAMINMAX}},
3655+
{"fp8fma", {AArch64::FeatureFP8FMA}},
3656+
{"ssve-fp8fma", {AArch64::FeatureSSVE_FP8FMA}},
36443657
};
36453658

36463659
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {

llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
4444
static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
4545
uint64_t Address,
4646
const MCDisassembler *Decoder);
47+
static DecodeStatus
48+
DecodeFPR128_0to7RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
49+
const MCDisassembler *Decoder);
4750
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
4851
uint64_t Address,
4952
const MCDisassembler *Decoder);
@@ -437,6 +440,14 @@ DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
437440
return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
438441
}
439442

443+
static DecodeStatus
444+
DecodeFPR128_0to7RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
445+
const MCDisassembler *Decoder) {
446+
if (RegNo > 7)
447+
return Fail;
448+
return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
449+
}
450+
440451
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
441452
uint64_t Addr,
442453
const MCDisassembler *Decoder) {

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,10 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
245245
case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
246246
case AArch64::FPR32RegClassID:
247247
case AArch64::FPR64RegClassID:
248-
case AArch64::FPR64_loRegClassID:
249248
case AArch64::FPR128RegClassID:
249+
case AArch64::FPR64_loRegClassID:
250250
case AArch64::FPR128_loRegClassID:
251+
case AArch64::FPR128_0to7RegClassID:
251252
case AArch64::DDRegClassID:
252253
case AArch64::DDDRegClassID:
253254
case AArch64::DDDDRegClassID:

0 commit comments

Comments
 (0)