[llvm-branch-commits] [clang] [llvm] [AArch64][llvm] Armv9.7-A: Add support for new Advanced SIMD (Neon) instructions (PR #163165)

Wed Oct 22 11:02:26 PDT 2025

https://github.com/jthackray updated https://github.com/llvm/llvm-project/pull/163165

>From 5da2d2338bbf036ec7534856440dda1886cca5aa Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Mon, 15 Sep 2025 21:13:29 +0100
Subject: [PATCH 1/3] [AArch64][llvm] Armv9.7-A: Add support for new Advanced
 SIMD (Neon) instructions

Add support for new Advanced SIMD (Neon) instructions:
 - FDOT (half-precision to single-precision, by element)
 - FDOT (half-precision to single-precision, vector)
 - FMMLA (half-precision, non-widening)
 - FMMLA (widening, half-precision to single-precision)

as documented here:

 * https://developer.arm.com/documentation/ddi0602/2025-09/
 * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions

Co-authored-by: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Co-authored-by: Caroline Concatto <caroline.concatto at arm.com>
Co-authored-by: Virginia Cangelosi <virginia.cangelosi at arm.com>
---
 .../print-supported-extensions-aarch64.c      |   2 +
 llvm/lib/Target/AArch64/AArch64Features.td    |   6 +
 .../lib/Target/AArch64/AArch64InstrFormats.td |  42 ++++-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  17 +-
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |   2 +
 llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s  |   2 +-
 llvm/test/MC/AArch64/neon-fdot-diagnostics.s  |  59 +++++++
 llvm/test/MC/AArch64/neon-fdot.s              | 147 ++++++++++++++++++
 .../MC/AArch64/neon-fmmla-HtoS-diagnostics.s  |  24 +++
 llvm/test/MC/AArch64/neon-fmmla-HtoS.s        |  37 +++++
 llvm/test/MC/AArch64/neon-fmmla-diagnostics.s |  24 +++
 llvm/test/MC/AArch64/neon-fmmla.s             |  37 +++++
 .../TargetParser/TargetParserTest.cpp         |  32 +++-
 13 files changed, 424 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/neon-fdot-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/neon-fdot.s
 create mode 100644 llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/neon-fmmla-HtoS.s
 create mode 100644 llvm/test/MC/AArch64/neon-fmmla-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/neon-fmmla.s

diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 50c3610123646..7975b5ab7cb83 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -18,6 +18,8 @@
 // CHECK-NEXT:     d128                FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
 // CHECK-NEXT:     dit                 FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
 // CHECK-NEXT:     dotprod             FEAT_DotProd                                           Enable dot product support
+// CHECK-NEXT:     f16f32dot           FEAT_F16F32DOT                                         Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision
+// CHECK-NEXT:     f16f32mm            FEAT_F16F32MM                                          Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision
 // CHECK-NEXT:     f16mm               FEAT_F16MM                                             Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
 // CHECK-NEXT:     f32mm               FEAT_F32MM                                             Enable Matrix Multiply FP32 Extension
 // CHECK-NEXT:     f64mm               FEAT_F64MM                                             Enable Matrix Multiply FP64 Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 5f943d39321f9..d2838d5065d28 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -619,6 +619,12 @@ def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B1
 def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
   "Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
 
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+  "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+  "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
 //===----------------------------------------------------------------------===//
 //  Other Features
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6730c61552914..d0153c9b49903 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,6 +1166,21 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
   let ParserMatchClass = Imm0_15Operand;
 }
 
+// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
+def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
+  return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
+}]> {
+  let ParserMatchClass = Imm1_16Operand;
+}
+
+// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
+def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
+}]> {
+  let ParserMatchClass = Imm1_8Operand;
+}
+
+
 // timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
 def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
   return ((uint32_t)Imm) < 32;
@@ -6538,8 +6553,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
 }
 
 multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
-  def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+  def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
                                          V128, v8f16, v16i8, op>;
 }
 
@@ -6548,6 +6562,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
                                          V128, v4f32, v16i8, op>;
 }
 
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+  def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+                                          V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+  def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+                                          V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+  def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+                                         v2f32, v4f16, OpNode>;
+  def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+                                         v4f32, v8f16, OpNode>;
+}
+
 // FP8 assembly/disassembly classes
 
 //----------------------------------------------------------------------------
@@ -9169,6 +9200,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
                                               V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
 }
 
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+  def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+                                           V64, v2f32, v4f16, VectorIndexS, null_frag>;
+  def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+                                            V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
 //----------------------------------------------------------------------------
 // FP8 Advanced SIMD vector x indexed element
 multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 56b3e84a4ac96..ab8cd055002f7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -256,6 +256,10 @@ def HasSVE2p3       : Predicate<"Subtarget->hasSVE2p3()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
 def HasSME2p3       : Predicate<"Subtarget->hasSME2p3()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT    : Predicate<"Subtarget->hasF16F32DOT()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM     : Predicate<"Subtarget->hasF16F32MM()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
 
 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
 // they should be enabled if either has been specified.
@@ -11281,8 +11285,19 @@ let Predicates = [HasLSFE] in {
   def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
 }
 
+let Predicates = [HasF16F32DOT] in {
+  defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+  defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+  defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+  defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
 let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+  defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
 
 //===----------------------------------------------------------------------===//
 // Contention Management Hints (FEAT_CMH)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index b9f8537f969b3..633a7a5eb87a4 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3894,6 +3894,8 @@ static const struct Extension {
     {"sve2p3", {AArch64::FeatureSVE2p3}},
     {"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
     {"f16mm", {AArch64::FeatureF16MM}},
+    {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+    {"f16f32mm", {AArch64::FeatureF16F32MM}},
 };
 
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
index cf8d216581240..15efbee14ddc1 100644
--- a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
+++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
@@ -16,7 +16,7 @@ fmmla v0.4s, v1.4s, v2.4s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 fmmla v0.8h, v1.8h, v2.8h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f16mm
 // CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/neon-fdot-diagnostics.s b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
new file mode 100644
index 0000000000000..4f5f557644094
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=f16f32dot 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+fdot v0.2s, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// fdot indexed
+
+fdot v0.2s, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate range
+
+fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot.s b/llvm/test/MC/AArch64/neon-fdot.s
new file mode 100644
index 0000000000000..c8a8e2f9023e1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot.s
@@ -0,0 +1,147 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN:        | llvm-objdump -d --mattr=+f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN:        | llvm-objdump -d --mattr=-f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+f16f32dot -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fdot v0.2s, v0.4h, v0.4h
+// CHECK-INST: fdot v0.2s, v0.4h, v0.4h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e80fc00 <unknown>
+
+fdot v10.2s, v10.4h, v10.4h
+// CHECK-INST: fdot v10.2s, v10.4h, v10.4h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e8afd4a <unknown>
+
+fdot v31.2s, v31.4h, v31.4h
+// CHECK-INST: fdot v31.2s, v31.4h, v31.4h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e9fffff <unknown>
+
+fdot v0.4s, v0.8h, v0.8h
+// CHECK-INST: fdot v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e80fc00 <unknown>
+
+fdot v10.4s, v10.8h, v10.8h
+// CHECK-INST: fdot v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e8afd4a <unknown>
+
+fdot v31.4s, v31.8h, v31.8h
+// CHECK-INST: fdot v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e9fffff <unknown>
+
+// fdot indexed
+
+fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x00,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409000 <unknown>
+
+fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x0a,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40900a <unknown>
+
+fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x15,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409015 <unknown>
+
+fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x1f,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40901f <unknown>
+
+fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x40,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409140 <unknown>
+
+fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x4a,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40914a <unknown>
+
+fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x55,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409155 <unknown>
+
+fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x5f,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40915f <unknown>
+
+fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xa0,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aa0 <unknown>
+
+fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xaa,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aaa <unknown>
+
+fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xb5,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9ab5 <unknown>
+
+fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xbf,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9abf <unknown>
+
+fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xe0,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9be0 <unknown>
+
+fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xea,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bea <unknown>
+
+fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xf5,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bf5 <unknown>
+
+fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xff,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
new file mode 100644
index 0000000000000..ccc074225b6a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16f32mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.4b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4h, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4h, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v0.8s, v0.8s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4s, v0.8s, v0.8s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4d, v0.8d, v0.8d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4d, v0.8d, v0.8d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
new file mode 100644
index 0000000000000..6b3d352d285d8
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm< %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+f16f32mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.4s, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0x40,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e40ec00 <unknown>
+
+fmmla v10.4s, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0x4a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e4aed4a <unknown>
+
+fmmla v21.4s, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.4s, v21.8h, v21.8h
+// CHECK-ENCODING: encoding: [0xb5,0xee,0x55,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e55eeb5 <unknown>
+
+fmmla v31.4s, v31.8h, v31.8h
+// CHECK-INST: fmmla v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xef,0x5f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e5fefff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-diagnostics.s
new file mode 100644
index 0000000000000..7fc5373fb30bb
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.8b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.8b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8b, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.8b, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8s, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.8s, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8d, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.8d, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla.s b/llvm/test/MC/AArch64/neon-fmmla.s
new file mode 100644
index 0000000000000..f35c2fb9bfd91
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16mm< %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+f16mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-f16mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+f16mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.8h, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.8h, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0xc0,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4ec0ec00 <unknown>
+
+fmmla v10.8h, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.8h, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0xca,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4ecaed4a <unknown>
+
+fmmla v21.8h, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.8h, v21.8h, v21.8h
+// CHECK-ENCODING: encoding: [0xb5,0xee,0xd5,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4ed5eeb5 <unknown>
+
+fmmla v31.8h, v31.8h, v31.8h
+// CHECK-INST: fmmla v31.8h, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xef,0xdf,0x4e]
+// CHECK-ERROR: instruction requires: f16mm
+// CHECK-UNKNOWN: 4edfefff <unknown>
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index a259c1419a146..ef6aeae24c833 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1448,7 +1448,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
       AArch64::AEK_MPAMV2,       AArch64::AEK_MTETC,
       AArch64::AEK_GCIE,         AArch64::AEK_SME2P3,
       AArch64::AEK_SVE2P3,       AArch64::AEK_SVE_B16MM,
-      AArch64::AEK_F16MM,
+      AArch64::AEK_F16MM,        AArch64::AEK_F16F32DOT,
+      AArch64::AEK_F16F32MM,
   };
 
   std::vector<StringRef> Features;
@@ -1570,6 +1571,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2p3"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16mm"));
   EXPECT_TRUE(llvm::is_contained(Features, "+f16mm"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+f16f32dot"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+f16f32mm"));
 
   // Assuming we listed every extension above, this should produce the same
   // result.
@@ -1746,6 +1749,8 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"sve2p3", "nosve2p3", "+sve2p3", "-sve2p3"},
       {"sve-b16mm", "nosve-b16mm", "+sve-b16mm", "-sve-b16mm"},
       {"f16mm", "nof16mm", "+f16mm", "-f16mm"},
+      {"f16f32dot", "nof16f32dot", "+f16f32dot", "-f16f32dot"},
+      {"f16f32mm", "nof16f32mm", "+f16f32mm", "-f16f32mm"},
   };
 
   for (unsigned i = 0; i < std::size(ArchExt); i++) {
@@ -1960,7 +1965,8 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV9_6A, {"nofp", "fprcvt"}, {"fp-armv8", "fprcvt"}, {}},
         {AArch64::ARMV9_6A, {"fprcvt", "nofp"}, {}, {"fp-armv8", "fprcvt"}},
 
-        // simd -> {aes, sha2, sha3, sm4, f8f16mm, f8f32mm, faminmax, lut, fp8}
+        // simd -> {aes, sha2, sha3, sm4, f8f16mm, f8f32mm, faminmax, lut, fp8,
+        // f16f32dot, f16f32mm}
         {AArch64::ARMV8A, {"nosimd", "aes"}, {"neon", "aes"}, {}},
         {AArch64::ARMV8A, {"aes", "nosimd"}, {}, {"neon", "aes"}},
         {AArch64::ARMV8A, {"nosimd", "sha2"}, {"neon", "sha2"}, {}},
@@ -1979,6 +1985,10 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV9_6A, {"nosimd", "lut"}, {"neon", "lut"}, {}},
         {AArch64::ARMV9_6A, {"fp8", "nosimd"}, {}, {"neon", "fp8"}},
         {AArch64::ARMV9_6A, {"nosimd", "fp8"}, {"neon", "fp8"}, {}},
+        {AArch64::ARMV9_7A, {"nosimd", "f16f32mm"}, {"neon", "f16f32mm"}, {}},
+        {AArch64::ARMV9_7A, {"f16f32mm", "nosimd"}, {}, {"neon", "f16f32mm"}},
+        {AArch64::ARMV9_7A, {"nosimd", "f16f32dot"}, {"neon", "f16f32dot"}, {}},
+        {AArch64::ARMV9_7A, {"f16f32dot", "nosimd"}, {}, {"neon", "f16f32dot"}},
 
         // fp8 -> {fp8dot4, fp8dot2}
         {AArch64::ARMV9_6A, {"nofp8", "fp8dot4"}, {"fp8", "fp8dot4"}, {}},
@@ -1994,13 +2004,29 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV8A, {"nosimd", "fcma"}, {"neon", "complxnum"}, {}},
         {AArch64::ARMV8A, {"fcma", "nosimd"}, {}, {"neon", "complxnum"}},
 
-        // fp16 -> {fp16fml, sve, f16mm}
+        // fp16 -> {fp16fml, sve, f16f32dot, f16f32mm, f16mm}
         {AArch64::ARMV8A, {"nofp16", "fp16fml"}, {"fullfp16", "fp16fml"}, {}},
         {AArch64::ARMV8A, {"fp16fml", "nofp16"}, {}, {"fullfp16", "fp16fml"}},
         {AArch64::ARMV8A, {"nofp16", "sve"}, {"fullfp16", "sve"}, {}},
         {AArch64::ARMV8A, {"sve", "nofp16"}, {}, {"fullfp16", "sve"}},
         {AArch64::ARMV9_7A, {"nofp16", "f16mm"}, {"fullfp16", "f16mm"}, {}},
         {AArch64::ARMV9_7A, {"f16mm", "nofp16"}, {}, {"fullfp16", "f16mm"}},
+        {AArch64::ARMV9_7A,
+         {"nofp16", "f16f32mm"},
+         {"fullfp16", "f16f32mm"},
+         {}},
+        {AArch64::ARMV9_7A,
+         {"f16f32mm", "nofp16"},
+         {},
+         {"fullfp16", "f16f32mm"}},
+        {AArch64::ARMV9_7A,
+         {"nofp16", "f16f32dot"},
+         {"fullfp16", "f16f32dot"},
+         {}},
+        {AArch64::ARMV9_7A,
+         {"f16f32dot", "nofp16"},
+         {},
+         {"fullfp16", "f16f32dot"}},
 
         // bf16 -> {sme}
         {AArch64::ARMV8A, {"nobf16", "sme"}, {"bf16", "sme"}, {}},

>From 307c8b5cbb1de5e0b232e0f74ab480c53571b0ba Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Thu, 16 Oct 2025 14:48:12 +0100
Subject: [PATCH 2/3] fixup! [AArch64][llvm] Armv9.7-A: Add support for new
 Advanced SIMD (Neon) instructions

Remove timm32_1_16 and timm32_1_8, as they're superfluous.
---
 llvm/lib/Target/AArch64/AArch64InstrFormats.td | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d0153c9b49903..425fe0f7d0221 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,21 +1166,6 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
   let ParserMatchClass = Imm0_15Operand;
 }
 
-// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
-def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
-  return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
-}]> {
-  let ParserMatchClass = Imm1_16Operand;
-}
-
-// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
-def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
- return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
-}]> {
-  let ParserMatchClass = Imm1_8Operand;
-}
-
-
 // timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
 def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
   return ((uint32_t)Imm) < 32;

>From e785cbcff4c615febdfaeeb1f61ba662d9add734 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Wed, 22 Oct 2025 13:49:37 +0100
Subject: [PATCH 3/3] fixup! [AArch64][llvm] Armv9.7-A: Add support for new
 Advanced SIMD (Neon) instructions

Rename some `def`s after CR comments
---
 llvm/lib/Target/AArch64/AArch64InstrFormats.td | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 425fe0f7d0221..588af257d11ff 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6538,7 +6538,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
 }
 
 multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-  def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+  def v8f16_v16i8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
                                          V128, v8f16, v16i8, op>;
 }
 
@@ -6548,19 +6548,19 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
 }
 
 multiclass SIMDThreeSameVectorFMLA<string asm> {
-  def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+  def v8f16_v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
                                           V128, v8f16, v8f16, null_frag>;
 }
 
 multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
-  def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+  def v4f32_v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
                                           V128, v4f32, v8f16, null_frag>;
 }
 
 multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
-  def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+  def v2f32_v4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
                                          v2f32, v4f16, OpNode>;
-  def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+  def v4f32_v8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
                                          v4f32, v8f16, OpNode>;
 }
 
@@ -9186,9 +9186,9 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
 }
 
 multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
-  def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+  def v4f16_v2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
                                            V64, v2f32, v4f16, VectorIndexS, null_frag>;
-  def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+  def v8f16_v4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
                                             V128, v4f32, v8f16, VectorIndexS, null_frag>;
 }