[llvm-branch-commits] [clang] [llvm] [AArch64][llvm] Armv9.7-A: Add support for new Advanced SIMD (Neon) instructions (PR #163165)
Jonathan Thackray via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Oct 22 11:02:26 PDT 2025
https://github.com/jthackray updated https://github.com/llvm/llvm-project/pull/163165
>From 5da2d2338bbf036ec7534856440dda1886cca5aa Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Mon, 15 Sep 2025 21:13:29 +0100
Subject: [PATCH 1/3] [AArch64][llvm] Armv9.7-A: Add support for new Advanced
SIMD (Neon) instructions
Add support for new Advanced SIMD (Neon) instructions:
- FDOT (half-precision to single-precision, by element)
- FDOT (half-precision to single-precision, vector)
- FMMLA (half-precision, non-widening)
- FMMLA (widening, half-precision to single-precision)
as documented here:
* https://developer.arm.com/documentation/ddi0602/2025-09/
* https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions
Co-authored-by: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Co-authored-by: Caroline Concatto <caroline.concatto at arm.com>
Co-authored-by: Virginia Cangelosi <virginia.cangelosi at arm.com>
---
.../print-supported-extensions-aarch64.c | 2 +
llvm/lib/Target/AArch64/AArch64Features.td | 6 +
.../lib/Target/AArch64/AArch64InstrFormats.td | 42 ++++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 17 +-
.../AArch64/AsmParser/AArch64AsmParser.cpp | 2 +
llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s | 2 +-
llvm/test/MC/AArch64/neon-fdot-diagnostics.s | 59 +++++++
llvm/test/MC/AArch64/neon-fdot.s | 147 ++++++++++++++++++
.../MC/AArch64/neon-fmmla-HtoS-diagnostics.s | 24 +++
llvm/test/MC/AArch64/neon-fmmla-HtoS.s | 37 +++++
llvm/test/MC/AArch64/neon-fmmla-diagnostics.s | 24 +++
llvm/test/MC/AArch64/neon-fmmla.s | 37 +++++
.../TargetParser/TargetParserTest.cpp | 32 +++-
13 files changed, 424 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/MC/AArch64/neon-fdot-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/neon-fdot.s
create mode 100644 llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/neon-fmmla-HtoS.s
create mode 100644 llvm/test/MC/AArch64/neon-fmmla-diagnostics.s
create mode 100644 llvm/test/MC/AArch64/neon-fmmla.s
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 50c3610123646..7975b5ab7cb83 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -18,6 +18,8 @@
// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
// CHECK-NEXT: dit FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
+// CHECK-NEXT: f16f32dot FEAT_F16F32DOT Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision
+// CHECK-NEXT: f16f32mm FEAT_F16F32MM Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision
// CHECK-NEXT: f16mm FEAT_F16MM Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 5f943d39321f9..d2838d5065d28 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -619,6 +619,12 @@ def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B1
def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
"Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+ "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+ "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6730c61552914..d0153c9b49903 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,6 +1166,21 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_15Operand;
}
+// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
+def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
+}]> {
+ let ParserMatchClass = Imm1_16Operand;
+}
+
+// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
+def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
+}]> {
+ let ParserMatchClass = Imm1_8Operand;
+}
+
+
// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 32;
@@ -6538,8 +6553,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
- def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6548,6 +6562,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+ def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+ def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ v2f32, v4f16, OpNode>;
+ def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
// FP8 assembly/disassembly classes
//----------------------------------------------------------------------------
@@ -9169,6 +9200,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+ def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ V64, v2f32, v4f16, VectorIndexS, null_frag>;
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 56b3e84a4ac96..ab8cd055002f7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -256,6 +256,10 @@ def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -11281,8 +11285,19 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}
+let Predicates = [HasF16F32DOT] in {
+ defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+ defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
//===----------------------------------------------------------------------===//
// Contention Management Hints (FEAT_CMH)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index b9f8537f969b3..633a7a5eb87a4 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3894,6 +3894,8 @@ static const struct Extension {
{"sve2p3", {AArch64::FeatureSVE2p3}},
{"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
{"f16mm", {AArch64::FeatureF16MM}},
+ {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+ {"f16f32mm", {AArch64::FeatureF16F32MM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
index cf8d216581240..15efbee14ddc1 100644
--- a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
+++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
@@ -16,7 +16,7 @@ fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
fmmla v0.8h, v1.8h, v2.8h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f16mm
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot-diagnostics.s b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
new file mode 100644
index 0000000000000..4f5f557644094
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=f16f32dot 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+fdot v0.2s, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// fdot indexed
+
+fdot v0.2s, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate range
+
+fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot.s b/llvm/test/MC/AArch64/neon-fdot.s
new file mode 100644
index 0000000000000..c8a8e2f9023e1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot.s
@@ -0,0 +1,147 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32dot -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fdot v0.2s, v0.4h, v0.4h
+// CHECK-INST: fdot v0.2s, v0.4h, v0.4h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e80fc00 <unknown>
+
+fdot v10.2s, v10.4h, v10.4h
+// CHECK-INST: fdot v10.2s, v10.4h, v10.4h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e8afd4a <unknown>
+
+fdot v31.2s, v31.4h, v31.4h
+// CHECK-INST: fdot v31.2s, v31.4h, v31.4h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e9fffff <unknown>
+
+fdot v0.4s, v0.8h, v0.8h
+// CHECK-INST: fdot v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e80fc00 <unknown>
+
+fdot v10.4s, v10.8h, v10.8h
+// CHECK-INST: fdot v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e8afd4a <unknown>
+
+fdot v31.4s, v31.8h, v31.8h
+// CHECK-INST: fdot v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e9fffff <unknown>
+
+// fdot indexed
+
+fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x00,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409000 <unknown>
+
+fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x0a,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40900a <unknown>
+
+fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x15,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409015 <unknown>
+
+fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x1f,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40901f <unknown>
+
+fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x40,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409140 <unknown>
+
+fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x4a,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40914a <unknown>
+
+fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x55,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409155 <unknown>
+
+fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x5f,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40915f <unknown>
+
+fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xa0,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aa0 <unknown>
+
+fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xaa,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aaa <unknown>
+
+fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xb5,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9ab5 <unknown>
+
+fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xbf,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9abf <unknown>
+
+fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xe0,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9be0 <unknown>
+
+fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xea,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bea <unknown>
+
+fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xf5,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bf5 <unknown>
+
+fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xff,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
new file mode 100644
index 0000000000000..ccc074225b6a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16f32mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.4b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4h, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4h, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v0.8s, v0.8s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4s, v0.8s, v0.8s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4d, v0.8d, v0.8d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4d, v0.8d, v0.8d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
new file mode 100644
index 0000000000000..6b3d352d285d8
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm< %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32mm -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.4s, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0x40,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e40ec00 <unknown>
+
+fmmla v10.4s, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0x4a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e4aed4a <unknown>
+
+fmmla v21.4s, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.4s, v21.8h, v21.8h
+// CHECK-ENCODING: encoding: [0xb5,0xee,0x55,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e55eeb5 <unknown>
+
+fmmla v31.4s, v31.8h, v31.8h
+// CHECK-INST: fmmla v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xef,0x5f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e5fefff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-diagnostics.s
new file mode 100644
index 0000000000000..7fc5373fb30bb
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.8b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.8b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8b, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.8b, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8s, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.8s, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8d, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.8d, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla.s b/llvm/test/MC/AArch64/neon-fmmla.s
new file mode 100644
index 0000000000000..f35c2fb9bfd91
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16mm< %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16mm < %s \
+// RUN: | llvm-objdump -d --mattr=+f16mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16mm < %s \
+// RUN: | llvm-objdump -d --mattr=-f16mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16mm < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16mm -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.8h, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.8h, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0xc0,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4ec0ec00 <unknown>
+
+fmmla v10.8h, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.8h, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0xca,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4ecaed4a <unknown>
+
+fmmla v21.8h, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.8h, v21.8h, v21.8h
+// CHECK-ENCODING: encoding: [0xb5,0xee,0xd5,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4ed5eeb5 <unknown>
+
+fmmla v31.8h, v31.8h, v31.8h
+// CHECK-INST: fmmla v31.8h, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xef,0xdf,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4edfefff <unknown>
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index a259c1419a146..ef6aeae24c833 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1448,7 +1448,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
AArch64::AEK_MPAMV2, AArch64::AEK_MTETC,
AArch64::AEK_GCIE, AArch64::AEK_SME2P3,
AArch64::AEK_SVE2P3, AArch64::AEK_SVE_B16MM,
- AArch64::AEK_F16MM,
+ AArch64::AEK_F16MM, AArch64::AEK_F16F32DOT,
+ AArch64::AEK_F16F32MM,
};
std::vector<StringRef> Features;
@@ -1570,6 +1571,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+sve2p3"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16mm"));
EXPECT_TRUE(llvm::is_contained(Features, "+f16mm"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+f16f32dot"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+f16f32mm"));
// Assuming we listed every extension above, this should produce the same
// result.
@@ -1746,6 +1749,8 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"sve2p3", "nosve2p3", "+sve2p3", "-sve2p3"},
{"sve-b16mm", "nosve-b16mm", "+sve-b16mm", "-sve-b16mm"},
{"f16mm", "nof16mm", "+f16mm", "-f16mm"},
+ {"f16f32dot", "nof16f32dot", "+f16f32dot", "-f16f32dot"},
+ {"f16f32mm", "nof16f32mm", "+f16f32mm", "-f16f32mm"},
};
for (unsigned i = 0; i < std::size(ArchExt); i++) {
@@ -1960,7 +1965,8 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV9_6A, {"nofp", "fprcvt"}, {"fp-armv8", "fprcvt"}, {}},
{AArch64::ARMV9_6A, {"fprcvt", "nofp"}, {}, {"fp-armv8", "fprcvt"}},
- // simd -> {aes, sha2, sha3, sm4, f8f16mm, f8f32mm, faminmax, lut, fp8}
+ // simd -> {aes, sha2, sha3, sm4, f8f16mm, f8f32mm, faminmax, lut, fp8,
+ // f16f32dot, f16f32mm}
{AArch64::ARMV8A, {"nosimd", "aes"}, {"neon", "aes"}, {}},
{AArch64::ARMV8A, {"aes", "nosimd"}, {}, {"neon", "aes"}},
{AArch64::ARMV8A, {"nosimd", "sha2"}, {"neon", "sha2"}, {}},
@@ -1979,6 +1985,10 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV9_6A, {"nosimd", "lut"}, {"neon", "lut"}, {}},
{AArch64::ARMV9_6A, {"fp8", "nosimd"}, {}, {"neon", "fp8"}},
{AArch64::ARMV9_6A, {"nosimd", "fp8"}, {"neon", "fp8"}, {}},
+ {AArch64::ARMV9_7A, {"nosimd", "f16f32mm"}, {"neon", "f16f32mm"}, {}},
+ {AArch64::ARMV9_7A, {"f16f32mm", "nosimd"}, {}, {"neon", "f16f32mm"}},
+ {AArch64::ARMV9_7A, {"nosimd", "f16f32dot"}, {"neon", "f16f32dot"}, {}},
+ {AArch64::ARMV9_7A, {"f16f32dot", "nosimd"}, {}, {"neon", "f16f32dot"}},
// fp8 -> {fp8dot4, fp8dot2}
{AArch64::ARMV9_6A, {"nofp8", "fp8dot4"}, {"fp8", "fp8dot4"}, {}},
@@ -1994,13 +2004,29 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nosimd", "fcma"}, {"neon", "complxnum"}, {}},
{AArch64::ARMV8A, {"fcma", "nosimd"}, {}, {"neon", "complxnum"}},
- // fp16 -> {fp16fml, sve, f16mm}
+ // fp16 -> {fp16fml, sve, f16f32dot, f16f32mm, f16mm}
{AArch64::ARMV8A, {"nofp16", "fp16fml"}, {"fullfp16", "fp16fml"}, {}},
{AArch64::ARMV8A, {"fp16fml", "nofp16"}, {}, {"fullfp16", "fp16fml"}},
{AArch64::ARMV8A, {"nofp16", "sve"}, {"fullfp16", "sve"}, {}},
{AArch64::ARMV8A, {"sve", "nofp16"}, {}, {"fullfp16", "sve"}},
{AArch64::ARMV9_7A, {"nofp16", "f16mm"}, {"fullfp16", "f16mm"}, {}},
{AArch64::ARMV9_7A, {"f16mm", "nofp16"}, {}, {"fullfp16", "f16mm"}},
+ {AArch64::ARMV9_7A,
+ {"nofp16", "f16f32mm"},
+ {"fullfp16", "f16f32mm"},
+ {}},
+ {AArch64::ARMV9_7A,
+ {"f16f32mm", "nofp16"},
+ {},
+ {"fullfp16", "f16f32mm"}},
+ {AArch64::ARMV9_7A,
+ {"nofp16", "f16f32dot"},
+ {"fullfp16", "f16f32dot"},
+ {}},
+ {AArch64::ARMV9_7A,
+ {"f16f32dot", "nofp16"},
+ {},
+ {"fullfp16", "f16f32dot"}},
// bf16 -> {sme}
{AArch64::ARMV8A, {"nobf16", "sme"}, {"bf16", "sme"}, {}},
>From 307c8b5cbb1de5e0b232e0f74ab480c53571b0ba Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Thu, 16 Oct 2025 14:48:12 +0100
Subject: [PATCH 2/3] fixup! [AArch64][llvm] Armv9.7-A: Add support for new
Advanced SIMD (Neon) instructions
Remove timm32_1_16 and timm32_1_8, as they're superfluous.
---
llvm/lib/Target/AArch64/AArch64InstrFormats.td | 15 ---------------
1 file changed, 15 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d0153c9b49903..425fe0f7d0221 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,21 +1166,6 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_15Operand;
}
-// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
-def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
- return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
-}]> {
- let ParserMatchClass = Imm1_16Operand;
-}
-
-// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
-def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
- return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
-}]> {
- let ParserMatchClass = Imm1_8Operand;
-}
-
-
// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 32;
>From e785cbcff4c615febdfaeeb1f61ba662d9add734 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Wed, 22 Oct 2025 13:49:37 +0100
Subject: [PATCH 3/3] fixup! [AArch64][llvm] Armv9.7-A: Add support for new
Advanced SIMD (Neon) instructions
Rename some `def`s after CR comments
---
llvm/lib/Target/AArch64/AArch64InstrFormats.td | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 425fe0f7d0221..588af257d11ff 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6538,7 +6538,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
- def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v8f16_v16i8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6548,19 +6548,19 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
}
multiclass SIMDThreeSameVectorFMLA<string asm> {
- def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ def v8f16_v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
V128, v8f16, v8f16, null_frag>;
}
multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
- def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ def v4f32_v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
V128, v4f32, v8f16, null_frag>;
}
multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
- def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ def v2f32_v4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
v2f32, v4f16, OpNode>;
- def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ def v4f32_v8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
v4f32, v8f16, OpNode>;
}
@@ -9186,9 +9186,9 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
}
multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
- def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ def v4f16_v2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
V64, v2f32, v4f16, VectorIndexS, null_frag>;
- def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ def v8f16_v4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
V128, v4f32, v8f16, VectorIndexS, null_frag>;
}
More information about the llvm-branch-commits
mailing list