[llvm] a5f0525 - [AArch64][SelectionDAG] Enable new partial reduction lowering by default (#143565)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 16 08:47:59 PDT 2025
Author: Nicholas Guy
Date: 2025-06-16T16:47:55+01:00
New Revision: a5f0525d4b3edba50706cb0e4b9a48f0691e2b4c
URL: https://github.com/llvm/llvm-project/commit/a5f0525d4b3edba50706cb0e4b9a48f0691e2b4c
DIFF: https://github.com/llvm/llvm-project/commit/a5f0525d4b3edba50706cb0e4b9a48f0691e2b4c.diff
LOG: [AArch64][SelectionDAG] Enable new partial reduction lowering by default (#143565)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7519ac5260a64..c86aed7b38c8c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -153,13 +153,6 @@ cl::opt<bool> EnableSVEGISel(
cl::desc("Enable / disable SVE scalable vectors in Global ISel"),
cl::init(false));
-// FIXME : This is a temporary flag, and is used to help transition to
-// performing lowering the proper way using the new PARTIAL_REDUCE_MLA ISD
-// nodes.
-static cl::opt<bool> EnablePartialReduceNodes(
- "aarch64-enable-partial-reduce-nodes", cl::init(false), cl::ReallyHidden,
- cl::desc("Use the new method of lowering partial reductions."));
-
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
@@ -1457,7 +1450,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
setOperationAction(ISD::FADD, VT, Custom);
- if (EnablePartialReduceNodes && Subtarget->hasDotProd()) {
+ if (Subtarget->hasDotProd()) {
static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
ISD::PARTIAL_REDUCE_UMLA};
@@ -1895,7 +1888,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// Handle partial reduction operations
- if (EnablePartialReduceNodes && Subtarget->isSVEorStreamingSVEAvailable()) {
+ if (Subtarget->isSVEorStreamingSVEAvailable()) {
// Mark known legal pairs as 'Legal' (these will expand to UDOT or SDOT).
// Other pairs will default to 'Expand'.
static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
@@ -1957,17 +1950,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::nxv2i64,
Custom);
- if (EnablePartialReduceNodes) {
- static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
- ISD::PARTIAL_REDUCE_UMLA};
- // Must be lowered to SVE instructions.
- setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v4i32, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v8i16, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v8i16, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v8i16, MVT::v16i8, Custom);
- }
+ static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
+ ISD::PARTIAL_REDUCE_UMLA};
+ // Must be lowered to SVE instructions.
+ setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v4i32, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v8i16, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v8i16, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v8i16, MVT::v16i8, Custom);
}
}
@@ -2165,16 +2156,6 @@ bool AArch64TargetLowering::shouldExpandPartialReductionIntrinsic(
assert(I->getIntrinsicID() ==
Intrinsic::experimental_vector_partial_reduce_add &&
"Unexpected intrinsic!");
- if (EnablePartialReduceNodes)
- return true;
-
- EVT VT = EVT::getEVT(I->getType());
- auto Op1 = I->getOperand(1);
- EVT Op1VT = EVT::getEVT(Op1->getType());
- if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
- (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount() ||
- VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()))
- return false;
return true;
}
@@ -2252,37 +2233,32 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
bool PreferNEON = VT.is64BitVector() || VT.is128BitVector();
bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
- if (EnablePartialReduceNodes) {
- static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
- ISD::PARTIAL_REDUCE_UMLA};
- unsigned NumElts = VT.getVectorNumElements();
- if (VT.getVectorElementType() == MVT::i64) {
- setPartialReduceMLAAction(MLAOps, VT,
- MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
- setPartialReduceMLAAction(
- MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 4), Custom);
- setPartialReduceMLAAction(
- MLAOps, VT, MVT::getVectorVT(MVT::i32, NumElts * 2), Custom);
- } else if (VT.getVectorElementType() == MVT::i32) {
- setPartialReduceMLAAction(MLAOps, VT,
+ static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
+ ISD::PARTIAL_REDUCE_UMLA};
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.getVectorElementType() == MVT::i64) {
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i16, NumElts * 4), Custom);
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i32, NumElts * 2), Custom);
+ } else if (VT.getVectorElementType() == MVT::i32) {
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i8, NumElts * 4), Custom);
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i16, NumElts * 2), Custom);
+ } else if (VT.getVectorElementType() == MVT::i16) {
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i8, NumElts * 2), Custom);
+ }
+ if (Subtarget->hasMatMulInt8()) {
+ if (VT.getVectorElementType() == MVT::i32)
+ setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, VT,
MVT::getVectorVT(MVT::i8, NumElts * 4), Custom);
- setPartialReduceMLAAction(
- MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 2), Custom);
- } else if (VT.getVectorElementType() == MVT::i16) {
- setPartialReduceMLAAction(MLAOps, VT,
- MVT::getVectorVT(MVT::i8, NumElts * 2), Custom);
- }
-
- if (Subtarget->hasMatMulInt8()) {
- if (VT.getVectorElementType() == MVT::i32)
- setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, VT,
- MVT::getVectorVT(MVT::i8, NumElts * 4),
- Custom);
- else if (VT.getVectorElementType() == MVT::i64)
- setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, VT,
- MVT::getVectorVT(MVT::i8, NumElts * 8),
- Custom);
- }
+ else if (VT.getVectorElementType() == MVT::i64)
+ setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, VT,
+ MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
}
// Lower fixed length vector operations to scalable equivalents.
diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index 0c7b3c7d3c138..0ea80a075fae9 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -1,15 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NOI8MM
-; RUN: llc -mtriple aarch64 -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM,CHECK-NODOT
-; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM
-; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NEWLOWERING-I8MM
+; RUN: llc -mtriple aarch64 -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NODOT
+; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-DOT
+; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-DOT-I8MM
define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
-; CHECK-DOT-LABEL: udot:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: udot v0.4s, v2.16b, v1.16b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: umull v3.8h, v2.8b, v1.8b
@@ -19,6 +13,16 @@ define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: udot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: udot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = zext <16 x i8> %u to <16 x i32>
%s.wide = zext <16 x i8> %s to <16 x i32>
%mult = mul nuw nsw <16 x i32> %s.wide, %u.wide
@@ -27,22 +31,6 @@ define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
}
define <4 x i32> @udot_in_loop(ptr %p1, ptr %p2){
-; CHECK-DOT-LABEL: udot_in_loop:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: mov x8, xzr
-; CHECK-DOT-NEXT: .LBB1_1: // %vector.body
-; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-NEXT: ldr q3, [x1, x8]
-; CHECK-DOT-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-NEXT: add x8, x8, #16
-; CHECK-DOT-NEXT: udot v1.4s, v2.16b, v3.16b
-; CHECK-DOT-NEXT: cmp x8, #16
-; CHECK-DOT-NEXT: b.ne .LBB1_1
-; CHECK-DOT-NEXT: // %bb.2: // %end
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot_in_loop:
; CHECK-NODOT: // %bb.0: // %entry
; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
@@ -63,6 +51,38 @@ define <4 x i32> @udot_in_loop(ptr %p1, ptr %p2){
; CHECK-NODOT-NEXT: b.ne .LBB1_1
; CHECK-NODOT-NEXT: // %bb.2: // %end
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot_in_loop:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: .LBB1_1: // %vector.body
+; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: udot v1.4s, v2.16b, v3.16b
+; CHECK-DOT-NEXT: cmp x8, #16
+; CHECK-DOT-NEXT: b.ne .LBB1_1
+; CHECK-DOT-NEXT: // %bb.2: // %end
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_in_loop:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: .LBB1_1: // %vector.body
+; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: udot v1.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: cmp x8, #16
+; CHECK-DOT-I8MM-NEXT: b.ne .LBB1_1
+; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
+; CHECK-DOT-I8MM-NEXT: ret
entry:
br label %vector.body
@@ -86,11 +106,6 @@ end:
}
define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
-; CHECK-DOT-LABEL: udot_narrow:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: udot v0.2s, v2.8b, v1.8b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot_narrow:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: umull v1.8h, v2.8b, v1.8b
@@ -105,6 +120,16 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: udot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: udot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = zext <8 x i8> %u to <8 x i32>
%s.wide = zext <8 x i8> %s to <8 x i32>
%mult = mul nuw nsw <8 x i32> %s.wide, %u.wide
@@ -113,11 +138,6 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
}
define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
-; CHECK-DOT-LABEL: sdot:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: sdot v0.4s, v2.16b, v1.16b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: sdot:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: smull v3.8h, v2.8b, v1.8b
@@ -127,6 +147,16 @@ define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: saddw2 v0.4s, v0.4s, v1.8h
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: sdot:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: sdot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: sdot:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: sdot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = sext <16 x i8> %u to <16 x i32>
%s.wide = sext <16 x i8> %s to <16 x i32>
%mult = mul nuw nsw <16 x i32> %s.wide, %u.wide
@@ -135,11 +165,6 @@ define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
}
define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
-; CHECK-DOT-LABEL: sdot_narrow:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: sdot v0.2s, v2.8b, v1.8b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: sdot_narrow:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: smull v1.8h, v2.8b, v1.8b
@@ -154,6 +179,16 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: sdot_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: sdot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: sdot_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: sdot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = sext <8 x i8> %u to <8 x i32>
%s.wide = sext <8 x i8> %s to <8 x i32>
%mult = mul nuw nsw <8 x i32> %s.wide, %u.wide
@@ -162,27 +197,34 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
}
define <4 x i32> @usdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
-; CHECK-NOI8MM-LABEL: usdot:
-; CHECK-NOI8MM: // %bb.0:
-; CHECK-NOI8MM-NEXT: ushll v3.8h, v1.8b, #0
-; CHECK-NOI8MM-NEXT: sshll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v1.8h, v1.16b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v4.4h, v3.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v4.8h, v3.8h
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v2.4h, v1.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v1.8h
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: usdot:
+; CHECK-NODOT: // %bb.0:
+; CHECK-NODOT-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: smlal v0.4s, v4.4h, v3.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v4.8h, v3.8h
+; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: usdot:
-; CHECK-I8MM: // %bb.0:
-; CHECK-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: usdot:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-DOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: smlal v0.4s, v4.4h, v3.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v4.8h, v3.8h
+; CHECK-DOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: usdot:
-; CHECK-NEWLOWERING-I8MM: // %bb.0:
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: usdot:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = zext <16 x i8> %u to <16 x i32>
%s.wide = sext <16 x i8> %s to <16 x i32>
%mult = mul nuw nsw <16 x i32> %s.wide, %u.wide
@@ -191,60 +233,67 @@ define <4 x i32> @usdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
}
define <4 x i32> @usdot_in_loop(ptr %p1, ptr %p2){
-; CHECK-NOI8MM-LABEL: usdot_in_loop:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NOI8MM-NEXT: mov x8, xzr
-; CHECK-NOI8MM-NEXT: .LBB6_1: // %vector.body
-; CHECK-NOI8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NOI8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-NOI8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-NOI8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-NOI8MM-NEXT: add x8, x8, #16
-; CHECK-NOI8MM-NEXT: sshll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: ushll v5.8h, v3.8b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v3.8h, v3.16b, #0
-; CHECK-NOI8MM-NEXT: cmp x8, #16
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v4.4h, v5.4h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v4.8h, v5.8h
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v2.4h, v3.4h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v2.8h, v3.8h
-; CHECK-NOI8MM-NEXT: b.ne .LBB6_1
-; CHECK-NOI8MM-NEXT: // %bb.2: // %end
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: usdot_in_loop:
+; CHECK-NODOT: // %bb.0: // %entry
+; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: .LBB6_1: // %vector.body
+; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
+; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
+; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
+; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: ushll v5.8h, v3.8b, #0
+; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: ushll2 v3.8h, v3.16b, #0
+; CHECK-NODOT-NEXT: cmp x8, #16
+; CHECK-NODOT-NEXT: smlal v1.4s, v4.4h, v5.4h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
+; CHECK-NODOT-NEXT: smlal v1.4s, v2.4h, v3.4h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v2.8h, v3.8h
+; CHECK-NODOT-NEXT: b.ne .LBB6_1
+; CHECK-NODOT-NEXT: // %bb.2: // %end
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: usdot_in_loop:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-I8MM-NEXT: mov x8, xzr
-; CHECK-I8MM-NEXT: .LBB6_1: // %vector.body
-; CHECK-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-I8MM-NEXT: add x8, x8, #16
-; CHECK-I8MM-NEXT: usdot v1.4s, v3.16b, v2.16b
-; CHECK-I8MM-NEXT: cmp x8, #16
-; CHECK-I8MM-NEXT: b.ne .LBB6_1
-; CHECK-I8MM-NEXT: // %bb.2: // %end
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: usdot_in_loop:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: .LBB6_1: // %vector.body
+; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: ushll v5.8h, v3.8b, #0
+; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: ushll2 v3.8h, v3.16b, #0
+; CHECK-DOT-NEXT: cmp x8, #16
+; CHECK-DOT-NEXT: smlal v1.4s, v4.4h, v5.4h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
+; CHECK-DOT-NEXT: smlal v1.4s, v2.4h, v3.4h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v2.8h, v3.8h
+; CHECK-DOT-NEXT: b.ne .LBB6_1
+; CHECK-DOT-NEXT: // %bb.2: // %end
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: usdot_in_loop:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: mov x8, xzr
-; CHECK-NEWLOWERING-I8MM-NEXT: .LBB6_1: // %vector.body
-; CHECK-NEWLOWERING-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: add x8, x8, #16
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v1.4s, v3.16b, v2.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: cmp x8, #16
-; CHECK-NEWLOWERING-I8MM-NEXT: b.ne .LBB6_1
-; CHECK-NEWLOWERING-I8MM-NEXT: // %bb.2: // %end
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: usdot_in_loop:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: .LBB6_1: // %vector.body
+; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v3.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: cmp x8, #16
+; CHECK-DOT-I8MM-NEXT: b.ne .LBB6_1
+; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
+; CHECK-DOT-I8MM-NEXT: ret
entry:
br label %vector.body
@@ -268,32 +317,44 @@ end:
}
define <2 x i32> @usdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
-; CHECK-NOI8MM-LABEL: usdot_narrow:
-; CHECK-NOI8MM: // %bb.0:
-; CHECK-NOI8MM-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NOI8MM-NEXT: sshll v2.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NOI8MM-NEXT: smull v3.4s, v2.4h, v1.4h
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v2.4h, v1.4h
-; CHECK-NOI8MM-NEXT: ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-NOI8MM-NEXT: ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-NOI8MM-NEXT: smull2 v1.4s, v2.8h, v1.8h
-; CHECK-NOI8MM-NEXT: ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-NOI8MM-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NOI8MM-NEXT: add v0.2s, v3.2s, v0.2s
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v5.4h, v4.4h
-; CHECK-NOI8MM-NEXT: add v0.2s, v1.2s, v0.2s
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: usdot_narrow:
+; CHECK-NODOT: // %bb.0:
+; CHECK-NODOT-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NODOT-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NODOT-NEXT: smull v3.4s, v2.4h, v1.4h
+; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NODOT-NEXT: ext v5.16b, v2.16b, v2.16b, #8
+; CHECK-NODOT-NEXT: smull2 v1.4s, v2.8h, v1.8h
+; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-NODOT-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
+; CHECK-NODOT-NEXT: smlal v0.4s, v5.4h, v4.4h
+; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: usdot_narrow:
-; CHECK-I8MM: // %bb.0:
-; CHECK-I8MM-NEXT: usdot v0.2s, v1.8b, v2.8b
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: usdot_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-DOT-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-DOT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-DOT-NEXT: smull v3.4s, v2.4h, v1.4h
+; CHECK-DOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-DOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-DOT-NEXT: ext v5.16b, v2.16b, v2.16b, #8
+; CHECK-DOT-NEXT: smull2 v1.4s, v2.8h, v1.8h
+; CHECK-DOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-DOT-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-DOT-NEXT: add v0.2s, v3.2s, v0.2s
+; CHECK-DOT-NEXT: smlal v0.4s, v5.4h, v4.4h
+; CHECK-DOT-NEXT: add v0.2s, v1.2s, v0.2s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: usdot_narrow:
-; CHECK-NEWLOWERING-I8MM: // %bb.0:
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v0.2s, v1.8b, v2.8b
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: usdot_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: usdot v0.2s, v1.8b, v2.8b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = zext <8 x i8> %u to <8 x i32>
%s.wide = sext <8 x i8> %s to <8 x i32>
%mult = mul nuw nsw <8 x i32> %s.wide, %u.wide
@@ -302,27 +363,34 @@ define <2 x i32> @usdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
}
define <4 x i32> @sudot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) #0{
-; CHECK-NOI8MM-LABEL: sudot:
-; CHECK-NOI8MM: // %bb.0:
-; CHECK-NOI8MM-NEXT: sshll v3.8h, v1.8b, #0
-; CHECK-NOI8MM-NEXT: ushll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v1.8h, v1.16b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v4.4h, v3.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v4.8h, v3.8h
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v2.4h, v1.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v1.8h
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: sudot:
+; CHECK-NODOT: // %bb.0:
+; CHECK-NODOT-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-NODOT-NEXT: ushll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: smlal v0.4s, v4.4h, v3.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v4.8h, v3.8h
+; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: sudot:
-; CHECK-I8MM: // %bb.0:
-; CHECK-I8MM-NEXT: usdot v0.4s, v2.16b, v1.16b
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: sudot:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-DOT-NEXT: ushll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-DOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: smlal v0.4s, v4.4h, v3.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v4.8h, v3.8h
+; CHECK-DOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: sudot:
-; CHECK-NEWLOWERING-I8MM: // %bb.0:
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v0.4s, v2.16b, v1.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: sudot:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: ret
%s.wide = sext <16 x i8> %u to <16 x i32>
%u.wide = zext <16 x i8> %s to <16 x i32>
%mult = mul nuw nsw <16 x i32> %u.wide, %s.wide
@@ -331,60 +399,67 @@ define <4 x i32> @sudot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) #0{
}
define <4 x i32> @sudot_in_loop(ptr %p1, ptr %p2){
-; CHECK-NOI8MM-LABEL: sudot_in_loop:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NOI8MM-NEXT: mov x8, xzr
-; CHECK-NOI8MM-NEXT: .LBB9_1: // %vector.body
-; CHECK-NOI8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NOI8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-NOI8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-NOI8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-NOI8MM-NEXT: add x8, x8, #16
-; CHECK-NOI8MM-NEXT: ushll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: sshll v5.8h, v3.8b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v3.8h, v3.16b, #0
-; CHECK-NOI8MM-NEXT: cmp x8, #16
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v4.4h, v5.4h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v4.8h, v5.8h
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v2.4h, v3.4h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v2.8h, v3.8h
-; CHECK-NOI8MM-NEXT: b.ne .LBB9_1
-; CHECK-NOI8MM-NEXT: // %bb.2: // %end
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: sudot_in_loop:
+; CHECK-NODOT: // %bb.0: // %entry
+; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: .LBB9_1: // %vector.body
+; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
+; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
+; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
+; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: ushll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: sshll v5.8h, v3.8b, #0
+; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: sshll2 v3.8h, v3.16b, #0
+; CHECK-NODOT-NEXT: cmp x8, #16
+; CHECK-NODOT-NEXT: smlal v1.4s, v4.4h, v5.4h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
+; CHECK-NODOT-NEXT: smlal v1.4s, v2.4h, v3.4h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v2.8h, v3.8h
+; CHECK-NODOT-NEXT: b.ne .LBB9_1
+; CHECK-NODOT-NEXT: // %bb.2: // %end
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: sudot_in_loop:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-I8MM-NEXT: mov x8, xzr
-; CHECK-I8MM-NEXT: .LBB9_1: // %vector.body
-; CHECK-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-I8MM-NEXT: add x8, x8, #16
-; CHECK-I8MM-NEXT: usdot v1.4s, v2.16b, v3.16b
-; CHECK-I8MM-NEXT: cmp x8, #16
-; CHECK-I8MM-NEXT: b.ne .LBB9_1
-; CHECK-I8MM-NEXT: // %bb.2: // %end
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: sudot_in_loop:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: .LBB9_1: // %vector.body
+; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: ushll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: sshll v5.8h, v3.8b, #0
+; CHECK-DOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: sshll2 v3.8h, v3.16b, #0
+; CHECK-DOT-NEXT: cmp x8, #16
+; CHECK-DOT-NEXT: smlal v1.4s, v4.4h, v5.4h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
+; CHECK-DOT-NEXT: smlal v1.4s, v2.4h, v3.4h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v2.8h, v3.8h
+; CHECK-DOT-NEXT: b.ne .LBB9_1
+; CHECK-DOT-NEXT: // %bb.2: // %end
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: sudot_in_loop:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: mov x8, xzr
-; CHECK-NEWLOWERING-I8MM-NEXT: .LBB9_1: // %vector.body
-; CHECK-NEWLOWERING-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: add x8, x8, #16
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v1.4s, v2.16b, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: cmp x8, #16
-; CHECK-NEWLOWERING-I8MM-NEXT: b.ne .LBB9_1
-; CHECK-NEWLOWERING-I8MM-NEXT: // %bb.2: // %end
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: sudot_in_loop:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: .LBB9_1: // %vector.body
+; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: cmp x8, #16
+; CHECK-DOT-I8MM-NEXT: b.ne .LBB9_1
+; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
+; CHECK-DOT-I8MM-NEXT: ret
entry:
br label %vector.body
@@ -408,32 +483,44 @@ end:
}
define <2 x i32> @sudot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
-; CHECK-NOI8MM-LABEL: sudot_narrow:
-; CHECK-NOI8MM: // %bb.0:
-; CHECK-NOI8MM-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NOI8MM-NEXT: ushll v2.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NOI8MM-NEXT: smull v3.4s, v2.4h, v1.4h
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v2.4h, v1.4h
-; CHECK-NOI8MM-NEXT: ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-NOI8MM-NEXT: ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-NOI8MM-NEXT: smull2 v1.4s, v2.8h, v1.8h
-; CHECK-NOI8MM-NEXT: ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-NOI8MM-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-NOI8MM-NEXT: add v0.2s, v3.2s, v0.2s
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v5.4h, v4.4h
-; CHECK-NOI8MM-NEXT: add v0.2s, v1.2s, v0.2s
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: sudot_narrow:
+; CHECK-NODOT: // %bb.0:
+; CHECK-NODOT-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-NODOT-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NODOT-NEXT: smull v3.4s, v2.4h, v1.4h
+; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NODOT-NEXT: ext v5.16b, v2.16b, v2.16b, #8
+; CHECK-NODOT-NEXT: smull2 v1.4s, v2.8h, v1.8h
+; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-NODOT-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
+; CHECK-NODOT-NEXT: smlal v0.4s, v5.4h, v4.4h
+; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: sudot_narrow:
-; CHECK-I8MM: // %bb.0:
-; CHECK-I8MM-NEXT: usdot v0.2s, v2.8b, v1.8b
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: sudot_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-DOT-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-DOT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-DOT-NEXT: smull v3.4s, v2.4h, v1.4h
+; CHECK-DOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-DOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-DOT-NEXT: ext v5.16b, v2.16b, v2.16b, #8
+; CHECK-DOT-NEXT: smull2 v1.4s, v2.8h, v1.8h
+; CHECK-DOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-DOT-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-DOT-NEXT: add v0.2s, v3.2s, v0.2s
+; CHECK-DOT-NEXT: smlal v0.4s, v5.4h, v4.4h
+; CHECK-DOT-NEXT: add v0.2s, v1.2s, v0.2s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: sudot_narrow:
-; CHECK-NEWLOWERING-I8MM: // %bb.0:
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v0.2s, v2.8b, v1.8b
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: sudot_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: usdot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = sext <8 x i8> %u to <8 x i32>
%s.wide = zext <8 x i8> %s to <8 x i32>
%mult = mul nuw nsw <8 x i32> %s.wide, %u.wide
@@ -460,21 +547,21 @@ define <4 x i64> @udot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: udot_8to64:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
-; CHECK-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
-; CHECK-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: udot_8to64:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-NEXT: udot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-NEXT: uaddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: udot_8to64:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: uaddw v0.2d, v0.2d, v4.2s
-; CHECK-NEWLOWERING-I8MM-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: udot_8to64:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: uaddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-I8MM-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-I8MM-NEXT: ret
entry:
%a.wide = zext <16 x i8> %a to <16 x i64>
%b.wide = zext <16 x i8> %b to <16 x i64>
@@ -503,21 +590,21 @@ define <4 x i64> @sdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: sdot_8to64:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
-; CHECK-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
-; CHECK-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: sdot_8to64:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-NEXT: sdot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-NEXT: saddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: sdot_8to64:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: sdot_8to64:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-I8MM-NEXT: ret
entry:
%a.wide = sext <16 x i8> %a to <16 x i64>
%b.wide = sext <16 x i8> %b to <16 x i64>
@@ -528,45 +615,61 @@ entry:
}
define <4 x i64> @usdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
-; CHECK-NOI8MM-LABEL: usdot_8to64:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: ushll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: sshll v5.8h, v3.8b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v3.8h, v3.16b, #0
-; CHECK-NOI8MM-NEXT: ushll v6.4s, v4.4h, #0
-; CHECK-NOI8MM-NEXT: ushll v7.4s, v2.4h, #0
-; CHECK-NOI8MM-NEXT: sshll v16.4s, v5.4h, #0
-; CHECK-NOI8MM-NEXT: sshll v17.4s, v3.4h, #0
-; CHECK-NOI8MM-NEXT: ushll2 v4.4s, v4.8h, #0
-; CHECK-NOI8MM-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-NOI8MM-NEXT: sshll2 v5.4s, v5.8h, #0
-; CHECK-NOI8MM-NEXT: sshll2 v3.4s, v3.8h, #0
-; CHECK-NOI8MM-NEXT: smlal v0.2d, v6.2s, v16.2s
-; CHECK-NOI8MM-NEXT: smlal v1.2d, v7.2s, v17.2s
-; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v6.4s, v16.4s
-; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v7.4s, v17.4s
-; CHECK-NOI8MM-NEXT: smlal v0.2d, v4.2s, v5.2s
-; CHECK-NOI8MM-NEXT: smlal v1.2d, v2.2s, v3.2s
-; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v4.4s, v5.4s
-; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: usdot_8to64:
+; CHECK-NODOT: // %bb.0: // %entry
+; CHECK-NODOT-NEXT: ushll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: sshll v5.8h, v3.8b, #0
+; CHECK-NODOT-NEXT: sshll2 v3.8h, v3.16b, #0
+; CHECK-NODOT-NEXT: ushll v6.4s, v4.4h, #0
+; CHECK-NODOT-NEXT: ushll v7.4s, v2.4h, #0
+; CHECK-NODOT-NEXT: sshll v16.4s, v5.4h, #0
+; CHECK-NODOT-NEXT: sshll v17.4s, v3.4h, #0
+; CHECK-NODOT-NEXT: ushll2 v4.4s, v4.8h, #0
+; CHECK-NODOT-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-NODOT-NEXT: sshll2 v5.4s, v5.8h, #0
+; CHECK-NODOT-NEXT: sshll2 v3.4s, v3.8h, #0
+; CHECK-NODOT-NEXT: smlal v0.2d, v6.2s, v16.2s
+; CHECK-NODOT-NEXT: smlal v1.2d, v7.2s, v17.2s
+; CHECK-NODOT-NEXT: smlal2 v0.2d, v6.4s, v16.4s
+; CHECK-NODOT-NEXT: smlal2 v1.2d, v7.4s, v17.4s
+; CHECK-NODOT-NEXT: smlal v0.2d, v4.2s, v5.2s
+; CHECK-NODOT-NEXT: smlal v1.2d, v2.2s, v3.2s
+; CHECK-NODOT-NEXT: smlal2 v0.2d, v4.4s, v5.4s
+; CHECK-NODOT-NEXT: smlal2 v1.2d, v2.4s, v3.4s
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: usdot_8to64:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: usdot v4.4s, v2.16b, v3.16b
-; CHECK-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
-; CHECK-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: usdot_8to64:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: ushll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: sshll v5.8h, v3.8b, #0
+; CHECK-DOT-NEXT: sshll2 v3.8h, v3.16b, #0
+; CHECK-DOT-NEXT: ushll v6.4s, v4.4h, #0
+; CHECK-DOT-NEXT: ushll v7.4s, v2.4h, #0
+; CHECK-DOT-NEXT: sshll v16.4s, v5.4h, #0
+; CHECK-DOT-NEXT: sshll v17.4s, v3.4h, #0
+; CHECK-DOT-NEXT: ushll2 v4.4s, v4.8h, #0
+; CHECK-DOT-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-DOT-NEXT: sshll2 v5.4s, v5.8h, #0
+; CHECK-DOT-NEXT: sshll2 v3.4s, v3.8h, #0
+; CHECK-DOT-NEXT: smlal v0.2d, v6.2s, v16.2s
+; CHECK-DOT-NEXT: smlal v1.2d, v7.2s, v17.2s
+; CHECK-DOT-NEXT: smlal2 v0.2d, v6.4s, v16.4s
+; CHECK-DOT-NEXT: smlal2 v1.2d, v7.4s, v17.4s
+; CHECK-DOT-NEXT: smlal v0.2d, v4.2s, v5.2s
+; CHECK-DOT-NEXT: smlal v1.2d, v2.2s, v3.2s
+; CHECK-DOT-NEXT: smlal2 v0.2d, v4.4s, v5.4s
+; CHECK-DOT-NEXT: smlal2 v1.2d, v2.4s, v3.4s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: usdot_8to64:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v4.4s, v2.16b, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: usdot_8to64:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: usdot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-I8MM-NEXT: ret
entry:
%a.wide = zext <16 x i8> %a to <16 x i64>
%b.wide = sext <16 x i8> %b to <16 x i64>
@@ -577,45 +680,61 @@ entry:
}
define <4 x i64> @sudot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
-; CHECK-NOI8MM-LABEL: sudot_8to64:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: sshll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: ushll v5.8h, v3.8b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v3.8h, v3.16b, #0
-; CHECK-NOI8MM-NEXT: sshll v6.4s, v4.4h, #0
-; CHECK-NOI8MM-NEXT: sshll v7.4s, v2.4h, #0
-; CHECK-NOI8MM-NEXT: ushll v16.4s, v5.4h, #0
-; CHECK-NOI8MM-NEXT: ushll v17.4s, v3.4h, #0
-; CHECK-NOI8MM-NEXT: sshll2 v4.4s, v4.8h, #0
-; CHECK-NOI8MM-NEXT: sshll2 v2.4s, v2.8h, #0
-; CHECK-NOI8MM-NEXT: ushll2 v5.4s, v5.8h, #0
-; CHECK-NOI8MM-NEXT: ushll2 v3.4s, v3.8h, #0
-; CHECK-NOI8MM-NEXT: smlal v0.2d, v6.2s, v16.2s
-; CHECK-NOI8MM-NEXT: smlal v1.2d, v7.2s, v17.2s
-; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v6.4s, v16.4s
-; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v7.4s, v17.4s
-; CHECK-NOI8MM-NEXT: smlal v0.2d, v4.2s, v5.2s
-; CHECK-NOI8MM-NEXT: smlal v1.2d, v2.2s, v3.2s
-; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v4.4s, v5.4s
-; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: sudot_8to64:
+; CHECK-NODOT: // %bb.0: // %entry
+; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: ushll v5.8h, v3.8b, #0
+; CHECK-NODOT-NEXT: ushll2 v3.8h, v3.16b, #0
+; CHECK-NODOT-NEXT: sshll v6.4s, v4.4h, #0
+; CHECK-NODOT-NEXT: sshll v7.4s, v2.4h, #0
+; CHECK-NODOT-NEXT: ushll v16.4s, v5.4h, #0
+; CHECK-NODOT-NEXT: ushll v17.4s, v3.4h, #0
+; CHECK-NODOT-NEXT: sshll2 v4.4s, v4.8h, #0
+; CHECK-NODOT-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-NODOT-NEXT: ushll2 v5.4s, v5.8h, #0
+; CHECK-NODOT-NEXT: ushll2 v3.4s, v3.8h, #0
+; CHECK-NODOT-NEXT: smlal v0.2d, v6.2s, v16.2s
+; CHECK-NODOT-NEXT: smlal v1.2d, v7.2s, v17.2s
+; CHECK-NODOT-NEXT: smlal2 v0.2d, v6.4s, v16.4s
+; CHECK-NODOT-NEXT: smlal2 v1.2d, v7.4s, v17.4s
+; CHECK-NODOT-NEXT: smlal v0.2d, v4.2s, v5.2s
+; CHECK-NODOT-NEXT: smlal v1.2d, v2.2s, v3.2s
+; CHECK-NODOT-NEXT: smlal2 v0.2d, v4.4s, v5.4s
+; CHECK-NODOT-NEXT: smlal2 v1.2d, v2.4s, v3.4s
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: sudot_8to64:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: usdot v4.4s, v3.16b, v2.16b
-; CHECK-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
-; CHECK-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: sudot_8to64:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: ushll v5.8h, v3.8b, #0
+; CHECK-DOT-NEXT: ushll2 v3.8h, v3.16b, #0
+; CHECK-DOT-NEXT: sshll v6.4s, v4.4h, #0
+; CHECK-DOT-NEXT: sshll v7.4s, v2.4h, #0
+; CHECK-DOT-NEXT: ushll v16.4s, v5.4h, #0
+; CHECK-DOT-NEXT: ushll v17.4s, v3.4h, #0
+; CHECK-DOT-NEXT: sshll2 v4.4s, v4.8h, #0
+; CHECK-DOT-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-DOT-NEXT: ushll2 v5.4s, v5.8h, #0
+; CHECK-DOT-NEXT: ushll2 v3.4s, v3.8h, #0
+; CHECK-DOT-NEXT: smlal v0.2d, v6.2s, v16.2s
+; CHECK-DOT-NEXT: smlal v1.2d, v7.2s, v17.2s
+; CHECK-DOT-NEXT: smlal2 v0.2d, v6.4s, v16.4s
+; CHECK-DOT-NEXT: smlal2 v1.2d, v7.4s, v17.4s
+; CHECK-DOT-NEXT: smlal v0.2d, v4.2s, v5.2s
+; CHECK-DOT-NEXT: smlal v1.2d, v2.2s, v3.2s
+; CHECK-DOT-NEXT: smlal2 v0.2d, v4.4s, v5.4s
+; CHECK-DOT-NEXT: smlal2 v1.2d, v2.4s, v3.4s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: sudot_8to64:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v4.4s, v3.16b, v2.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: sudot_8to64:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: usdot v4.4s, v3.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-I8MM-NEXT: ret
entry:
%a.wide = sext <16 x i8> %a to <16 x i64>
%b.wide = zext <16 x i8> %b to <16 x i64>
@@ -626,12 +745,6 @@ entry:
}
define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
-; CHECK-DOT-LABEL: udot_no_bin_op:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: movi v2.16b, #1
-; CHECK-DOT-NEXT: udot v0.4s, v1.16b, v2.16b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot_no_bin_op:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: ushll v2.8h, v1.8b, #0
@@ -641,77 +754,53 @@ define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot_no_bin_op:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: movi v2.16b, #1
+; CHECK-DOT-NEXT: udot v0.4s, v1.16b, v2.16b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_no_bin_op:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
+; CHECK-DOT-I8MM-NEXT: udot v0.4s, v1.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: ret
%a.wide = zext <16 x i8> %a to <16 x i32>
%partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide)
ret <4 x i32> %partial.reduce
}
define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){
-; CHECK-NODOT-LABEL: udot_no_bin_op_in_loop:
-; CHECK-NODOT: // %bb.0: // %entry
-; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NODOT-NEXT: mov x8, xzr
-; CHECK-NODOT-NEXT: .LBB16_1: // %vector.body
-; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
-; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
-; CHECK-NODOT-NEXT: add x8, x8, #16
-; CHECK-NODOT-NEXT: cmp x8, #16
-; CHECK-NODOT-NEXT: ushll v3.8h, v2.8b, #0
-; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
-; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v3.4h
-; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v3.8h
-; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v2.4h
-; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v2.8h
-; CHECK-NODOT-NEXT: b.ne .LBB16_1
-; CHECK-NODOT-NEXT: // %bb.2: // %end
-; CHECK-NODOT-NEXT: ret
-;
-; CHECK-I8MM-LABEL: udot_no_bin_op_in_loop:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-I8MM-NEXT: movi v2.16b, #1
-; CHECK-I8MM-NEXT: mov x8, xzr
-; CHECK-I8MM-NEXT: .LBB16_1: // %vector.body
-; CHECK-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-I8MM-NEXT: ldr q3, [x0, x8]
-; CHECK-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-I8MM-NEXT: add x8, x8, #16
-; CHECK-I8MM-NEXT: cmp x8, #16
-; CHECK-I8MM-NEXT: udot v1.4s, v3.16b, v2.16b
-; CHECK-I8MM-NEXT: b.ne .LBB16_1
-; CHECK-I8MM-NEXT: // %bb.2: // %end
-; CHECK-I8MM-NEXT: ret
-;
-; CHECK-NEWLOWERING-I8MM-LABEL: udot_no_bin_op_in_loop:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: adrp x8, .LCPI16_0
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: adrp x9, .LCPI16_2
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
-; CHECK-NEWLOWERING-I8MM-NEXT: adrp x8, .LCPI16_1
-; CHECK-NEWLOWERING-I8MM-NEXT: adrp x10, .LCPI16_3
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q3, [x8, :lo12:.LCPI16_1]
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q4, [x9, :lo12:.LCPI16_2]
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q5, [x10, :lo12:.LCPI16_3]
-; CHECK-NEWLOWERING-I8MM-NEXT: mov x8, xzr
-; CHECK-NEWLOWERING-I8MM-NEXT: .LBB16_1: // %vector.body
-; CHECK-NEWLOWERING-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q6, [x0, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: mov v0.16b, v2.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: add x8, x8, #16
-; CHECK-NEWLOWERING-I8MM-NEXT: cmp x8, #16
-; CHECK-NEWLOWERING-I8MM-NEXT: tbl v7.16b, { v6.16b }, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: tbl v16.16b, { v6.16b }, v4.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: tbl v17.16b, { v6.16b }, v5.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: tbl v6.16b, { v6.16b }, v1.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: add v2.4s, v2.4s, v17.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: add v7.4s, v16.4s, v7.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: add v2.4s, v2.4s, v7.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: add v2.4s, v2.4s, v6.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: b.ne .LBB16_1
-; CHECK-NEWLOWERING-I8MM-NEXT: // %bb.2: // %end
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-COMMON-LABEL: udot_no_bin_op_in_loop:
+; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: adrp x8, .LCPI16_0
+; CHECK-COMMON-NEXT: movi v2.2d, #0000000000000000
+; CHECK-COMMON-NEXT: adrp x9, .LCPI16_2
+; CHECK-COMMON-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-COMMON-NEXT: adrp x8, .LCPI16_1
+; CHECK-COMMON-NEXT: adrp x10, .LCPI16_3
+; CHECK-COMMON-NEXT: ldr q3, [x8, :lo12:.LCPI16_1]
+; CHECK-COMMON-NEXT: ldr q4, [x9, :lo12:.LCPI16_2]
+; CHECK-COMMON-NEXT: ldr q5, [x10, :lo12:.LCPI16_3]
+; CHECK-COMMON-NEXT: mov x8, xzr
+; CHECK-COMMON-NEXT: .LBB16_1: // %vector.body
+; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-COMMON-NEXT: ldr q6, [x0, x8]
+; CHECK-COMMON-NEXT: mov v0.16b, v2.16b
+; CHECK-COMMON-NEXT: add x8, x8, #16
+; CHECK-COMMON-NEXT: cmp x8, #16
+; CHECK-COMMON-NEXT: tbl v7.16b, { v6.16b }, v3.16b
+; CHECK-COMMON-NEXT: tbl v16.16b, { v6.16b }, v4.16b
+; CHECK-COMMON-NEXT: tbl v17.16b, { v6.16b }, v5.16b
+; CHECK-COMMON-NEXT: tbl v6.16b, { v6.16b }, v1.16b
+; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v17.4s
+; CHECK-COMMON-NEXT: add v7.4s, v16.4s, v7.4s
+; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v7.4s
+; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v6.4s
+; CHECK-COMMON-NEXT: b.ne .LBB16_1
+; CHECK-COMMON-NEXT: // %bb.2: // %end
+; CHECK-COMMON-NEXT: ret
entry:
br label %vector.body
@@ -731,12 +820,6 @@ end:
}
define <4 x i32> @sdot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
-; CHECK-DOT-LABEL: sdot_no_bin_op:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: movi v2.16b, #1
-; CHECK-DOT-NEXT: sdot v0.4s, v1.16b, v2.16b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: sdot_no_bin_op:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: sshll v2.8h, v1.8b, #0
@@ -746,18 +829,24 @@ define <4 x i32> @sdot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: saddw2 v0.4s, v0.4s, v1.8h
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: sdot_no_bin_op:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: movi v2.16b, #1
+; CHECK-DOT-NEXT: sdot v0.4s, v1.16b, v2.16b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: sdot_no_bin_op:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
+; CHECK-DOT-I8MM-NEXT: sdot v0.4s, v1.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: ret
%a.wide = sext <16 x i8> %a to <16 x i32>
%partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide)
ret <4 x i32> %partial.reduce
}
define <2 x i32> @udot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){
-; CHECK-DOT-LABEL: udot_no_bin_op_narrow:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: movi v2.8b, #1
-; CHECK-DOT-NEXT: udot v0.2s, v1.8b, v2.8b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot_no_bin_op_narrow:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: ushll v1.8h, v1.8b, #0
@@ -772,18 +861,24 @@ define <2 x i32> @udot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot_no_bin_op_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: movi v2.8b, #1
+; CHECK-DOT-NEXT: udot v0.2s, v1.8b, v2.8b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: movi v2.8b, #1
+; CHECK-DOT-I8MM-NEXT: udot v0.2s, v1.8b, v2.8b
+; CHECK-DOT-I8MM-NEXT: ret
%a.wide = zext <8 x i8> %a to <8 x i32>
%partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide)
ret <2 x i32> %partial.reduce
}
define <2 x i32> @sdot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){
-; CHECK-DOT-LABEL: sdot_no_bin_op_narrow:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: movi v2.8b, #1
-; CHECK-DOT-NEXT: sdot v0.2s, v1.8b, v2.8b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: sdot_no_bin_op_narrow:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: sshll v1.8h, v1.8b, #0
@@ -798,6 +893,18 @@ define <2 x i32> @sdot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: sdot_no_bin_op_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: movi v2.8b, #1
+; CHECK-DOT-NEXT: sdot v0.2s, v1.8b, v2.8b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: sdot_no_bin_op_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: movi v2.8b, #1
+; CHECK-DOT-I8MM-NEXT: sdot v0.2s, v1.8b, v2.8b
+; CHECK-DOT-I8MM-NEXT: ret
%a.wide = sext <8 x i8> %a to <8 x i32>
%partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide)
ret <2 x i32> %partial.reduce
@@ -822,23 +929,23 @@ define <4 x i64> @udot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v3.4s
; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: udot_no_bin_op_8to64:
-; CHECK-I8MM: // %bb.0:
-; CHECK-I8MM-NEXT: movi v3.16b, #1
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
-; CHECK-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
-; CHECK-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: udot_no_bin_op_8to64:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: movi v3.16b, #1
+; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-NEXT: udot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-NEXT: uaddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: udot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-I8MM: // %bb.0:
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v3.16b, #1
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: uaddw v0.2d, v0.2d, v4.2s
-; CHECK-NEWLOWERING-I8MM-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_8to64:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: movi v3.16b, #1
+; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: uaddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-I8MM-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-I8MM-NEXT: ret
%a.wide = zext <16 x i8> %a to <16 x i64>
%partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide)
ret <4 x i64> %partial.reduce
@@ -863,35 +970,35 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v3.4s
; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: sdot_no_bin_op_8to64:
-; CHECK-I8MM: // %bb.0:
-; CHECK-I8MM-NEXT: movi v3.16b, #1
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
-; CHECK-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
-; CHECK-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: sdot_no_bin_op_8to64:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: movi v3.16b, #1
+; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-NEXT: sdot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-NEXT: saddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: sdot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-I8MM: // %bb.0:
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v3.16b, #1
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
-; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: sdot_no_bin_op_8to64:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: movi v3.16b, #1
+; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
+; CHECK-DOT-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
+; CHECK-DOT-I8MM-NEXT: ret
%a.wide = sext <16 x i8> %a to <16 x i64>
%partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide)
ret <4 x i64> %partial.reduce
}
define <4 x i32> @not_udot(<4 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
-; CHECK-LABEL: not_udot:
-; CHECK: // %bb.0:
-; CHECK-NEXT: umull v1.8h, v2.8b, v1.8b
-; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
-; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: not_udot:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: umull v1.8h, v2.8b, v1.8b
+; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-COMMON-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
+; CHECK-COMMON-NEXT: ret
%u.wide = zext <8 x i8> %u to <8 x i32>
%s.wide = zext <8 x i8> %s to <8 x i32>
%mult = mul nuw nsw <8 x i32> %s.wide, %u.wide
@@ -900,16 +1007,16 @@ define <4 x i32> @not_udot(<4 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
}
define <2 x i32> @not_udot_narrow(<2 x i32> %acc, <4 x i8> %u, <4 x i8> %s) {
-; CHECK-LABEL: not_udot_narrow:
-; CHECK: // %bb.0:
-; CHECK-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-NEXT: bic v2.4h, #255, lsl #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umull v3.4s, v2.4h, v1.4h
-; CHECK-NEXT: umlal v0.4s, v2.4h, v1.4h
-; CHECK-NEXT: ext v1.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: add v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: not_udot_narrow:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-COMMON-NEXT: bic v2.4h, #255, lsl #8
+; CHECK-COMMON-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-COMMON-NEXT: umull v3.4s, v2.4h, v1.4h
+; CHECK-COMMON-NEXT: umlal v0.4s, v2.4h, v1.4h
+; CHECK-COMMON-NEXT: ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-COMMON-NEXT: add v0.2s, v1.2s, v0.2s
+; CHECK-COMMON-NEXT: ret
%u.wide = zext <4 x i8> %u to <4 x i32>
%s.wide = zext <4 x i8> %s to <4 x i32>
%mult = mul nuw nsw <4 x i32> %s.wide, %u.wide
@@ -918,18 +1025,18 @@ define <2 x i32> @not_udot_narrow(<2 x i32> %acc, <4 x i8> %u, <4 x i8> %s) {
}
define <2 x i64> @udot_
diff erent_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
-; CHECK-LABEL: udot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v2.8h, v2.8b, #0
-; CHECK-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-NEXT: umlal v0.2d, v3.2s, v4.2s
-; CHECK-NEXT: umlal2 v0.2d, v3.4s, v4.4s
-; CHECK-NEXT: umlal v0.2d, v1.2s, v2.2s
-; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: udot_
diff erent_types:
+; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-COMMON-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-COMMON-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-COMMON-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-COMMON-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-COMMON-NEXT: umlal v0.2d, v3.2s, v4.2s
+; CHECK-COMMON-NEXT: umlal2 v0.2d, v3.4s, v4.4s
+; CHECK-COMMON-NEXT: umlal v0.2d, v1.2s, v2.2s
+; CHECK-COMMON-NEXT: umlal2 v0.2d, v1.4s, v2.4s
+; CHECK-COMMON-NEXT: ret
entry:
%a.wide = zext <8 x i16> %a to <8 x i64>
%b.wide = zext <8 x i8> %b to <8 x i64>
@@ -939,18 +1046,18 @@ entry:
}
define <2 x i64> @sdot_
diff erent_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
-; CHECK-LABEL: sdot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v2.8h, v2.8b, #0
-; CHECK-NEXT: sshll v3.4s, v1.4h, #0
-; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-NEXT: sshll v4.4s, v2.4h, #0
-; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
-; CHECK-NEXT: smlal v0.2d, v3.2s, v4.2s
-; CHECK-NEXT: smlal2 v0.2d, v3.4s, v4.4s
-; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s
-; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: sdot_
diff erent_types:
+; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-COMMON-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-COMMON-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-COMMON-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-COMMON-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-COMMON-NEXT: smlal v0.2d, v3.2s, v4.2s
+; CHECK-COMMON-NEXT: smlal2 v0.2d, v3.4s, v4.4s
+; CHECK-COMMON-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-COMMON-NEXT: smlal2 v0.2d, v1.4s, v2.4s
+; CHECK-COMMON-NEXT: ret
entry:
%a.wide = sext <8 x i16> %a to <8 x i64>
%b.wide = sext <8 x i8> %b to <8 x i64>
@@ -960,18 +1067,18 @@ entry:
}
define <2 x i64> @usdot_
diff erent_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
-; CHECK-LABEL: usdot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v2.8h, v2.8b, #0
-; CHECK-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-NEXT: sshll v4.4s, v2.4h, #0
-; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
-; CHECK-NEXT: smlal v0.2d, v3.2s, v4.2s
-; CHECK-NEXT: smlal2 v0.2d, v3.4s, v4.4s
-; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s
-; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: usdot_
diff erent_types:
+; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: sshll v2.8h, v2.8b, #0
+; CHECK-COMMON-NEXT: ushll v3.4s, v1.4h, #0
+; CHECK-COMMON-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-COMMON-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-COMMON-NEXT: sshll2 v2.4s, v2.8h, #0
+; CHECK-COMMON-NEXT: smlal v0.2d, v3.2s, v4.2s
+; CHECK-COMMON-NEXT: smlal2 v0.2d, v3.4s, v4.4s
+; CHECK-COMMON-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-COMMON-NEXT: smlal2 v0.2d, v1.4s, v2.4s
+; CHECK-COMMON-NEXT: ret
entry:
%a.wide = zext <8 x i16> %a to <8 x i64>
%b.wide = sext <8 x i8> %b to <8 x i64>
@@ -981,18 +1088,18 @@ entry:
}
define <2 x i64> @sudot_
diff erent_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
-; CHECK-LABEL: sudot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v2.8h, v2.8b, #0
-; CHECK-NEXT: sshll v3.4s, v1.4h, #0
-; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-NEXT: smlal v0.2d, v3.2s, v4.2s
-; CHECK-NEXT: smlal2 v0.2d, v3.4s, v4.4s
-; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s
-; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: sudot_
diff erent_types:
+; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-COMMON-NEXT: sshll v3.4s, v1.4h, #0
+; CHECK-COMMON-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-COMMON-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-COMMON-NEXT: ushll2 v2.4s, v2.8h, #0
+; CHECK-COMMON-NEXT: smlal v0.2d, v3.2s, v4.2s
+; CHECK-COMMON-NEXT: smlal2 v0.2d, v3.4s, v4.4s
+; CHECK-COMMON-NEXT: smlal v0.2d, v1.2s, v2.2s
+; CHECK-COMMON-NEXT: smlal2 v0.2d, v1.4s, v2.4s
+; CHECK-COMMON-NEXT: ret
entry:
%a.wide = sext <8 x i16> %a to <8 x i64>
%b.wide = zext <8 x i8> %b to <8 x i64>
@@ -1002,74 +1109,86 @@ entry:
}
define <4 x i32> @usdot_multiple_zext_users(ptr %p1, ptr %p2, ptr %p3) {
-; CHECK-NOI8MM-LABEL: usdot_multiple_zext_users:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NOI8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NOI8MM-NEXT: mov x8, xzr
-; CHECK-NOI8MM-NEXT: .LBB28_1: // %vector.body
-; CHECK-NOI8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NOI8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-NOI8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-NOI8MM-NEXT: ldr q4, [x2, x8]
-; CHECK-NOI8MM-NEXT: add x8, x8, #16
-; CHECK-NOI8MM-NEXT: sshll v5.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: ushll v6.8h, v4.8b, #0
-; CHECK-NOI8MM-NEXT: sshll v7.8h, v3.8b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v4.8h, v4.16b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v3.8h, v3.16b, #0
-; CHECK-NOI8MM-NEXT: cmp x8, #1024
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v5.4h, v6.4h
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v7.4h, v6.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v5.8h, v6.8h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v7.8h, v6.8h
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v2.4h, v4.4h
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v3.4h, v4.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v4.8h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v3.8h, v4.8h
-; CHECK-NOI8MM-NEXT: b.ne .LBB28_1
-; CHECK-NOI8MM-NEXT: // %bb.2: // %end
-; CHECK-NOI8MM-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: usdot_multiple_zext_users:
+; CHECK-NODOT: // %bb.0: // %entry
+; CHECK-NODOT-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: .LBB28_1: // %vector.body
+; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
+; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
+; CHECK-NODOT-NEXT: ldr q4, [x2, x8]
+; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: sshll v5.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: ushll v6.8h, v4.8b, #0
+; CHECK-NODOT-NEXT: sshll v7.8h, v3.8b, #0
+; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: ushll2 v4.8h, v4.16b, #0
+; CHECK-NODOT-NEXT: sshll2 v3.8h, v3.16b, #0
+; CHECK-NODOT-NEXT: cmp x8, #1024
+; CHECK-NODOT-NEXT: smlal v0.4s, v5.4h, v6.4h
+; CHECK-NODOT-NEXT: smlal v1.4s, v7.4h, v6.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v5.8h, v6.8h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v7.8h, v6.8h
+; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v4.4h
+; CHECK-NODOT-NEXT: smlal v1.4s, v3.4h, v4.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v2.8h, v4.8h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v3.8h, v4.8h
+; CHECK-NODOT-NEXT: b.ne .LBB28_1
+; CHECK-NODOT-NEXT: // %bb.2: // %end
+; CHECK-NODOT-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: usdot_multiple_zext_users:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v0.2d, #0000000000000000
-; CHECK-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-I8MM-NEXT: mov x8, xzr
-; CHECK-I8MM-NEXT: .LBB28_1: // %vector.body
-; CHECK-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-I8MM-NEXT: ldr q4, [x2, x8]
-; CHECK-I8MM-NEXT: add x8, x8, #16
-; CHECK-I8MM-NEXT: usdot v0.4s, v4.16b, v2.16b
-; CHECK-I8MM-NEXT: usdot v1.4s, v4.16b, v3.16b
-; CHECK-I8MM-NEXT: cmp x8, #1024
-; CHECK-I8MM-NEXT: b.ne .LBB28_1
-; CHECK-I8MM-NEXT: // %bb.2: // %end
-; CHECK-I8MM-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: usdot_multiple_zext_users:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v0.2d, #0000000000000000
+; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: .LBB28_1: // %vector.body
+; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: ldr q4, [x2, x8]
+; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: sshll v5.8h, v2.8b, #0
+; CHECK-DOT-NEXT: ushll v6.8h, v4.8b, #0
+; CHECK-DOT-NEXT: sshll v7.8h, v3.8b, #0
+; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: ushll2 v4.8h, v4.16b, #0
+; CHECK-DOT-NEXT: sshll2 v3.8h, v3.16b, #0
+; CHECK-DOT-NEXT: cmp x8, #1024
+; CHECK-DOT-NEXT: smlal v0.4s, v5.4h, v6.4h
+; CHECK-DOT-NEXT: smlal v1.4s, v7.4h, v6.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v5.8h, v6.8h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v7.8h, v6.8h
+; CHECK-DOT-NEXT: smlal v0.4s, v2.4h, v4.4h
+; CHECK-DOT-NEXT: smlal v1.4s, v3.4h, v4.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v2.8h, v4.8h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v3.8h, v4.8h
+; CHECK-DOT-NEXT: b.ne .LBB28_1
+; CHECK-DOT-NEXT: // %bb.2: // %end
+; CHECK-DOT-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: usdot_multiple_zext_users:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEWLOWERING-I8MM-NEXT: mov x8, xzr
-; CHECK-NEWLOWERING-I8MM-NEXT: .LBB28_1: // %vector.body
-; CHECK-NEWLOWERING-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: ldr q4, [x2, x8]
-; CHECK-NEWLOWERING-I8MM-NEXT: add x8, x8, #16
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v0.4s, v4.16b, v2.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v1.4s, v4.16b, v3.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: cmp x8, #1024
-; CHECK-NEWLOWERING-I8MM-NEXT: b.ne .LBB28_1
-; CHECK-NEWLOWERING-I8MM-NEXT: // %bb.2: // %end
-; CHECK-NEWLOWERING-I8MM-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: usdot_multiple_zext_users:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v0.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: .LBB28_1: // %vector.body
+; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q4, [x2, x8]
+; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v4.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v4.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: cmp x8, #1024
+; CHECK-DOT-I8MM-NEXT: b.ne .LBB28_1
+; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
+; CHECK-DOT-I8MM-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-DOT-I8MM-NEXT: ret
entry:
br label %vector.body
@@ -1100,15 +1219,15 @@ end:
}
define <2 x i64> @udot_16to64(<2 x i64> %acc, <8 x i16> %input){
-; CHECK-LABEL: udot_16to64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s
-; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v2.4s
-; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
-; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: udot_16to64:
+; CHECK-COMMON: // %bb.0: // %entry
+; CHECK-COMMON-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-COMMON-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-COMMON-NEXT: uaddw v0.2d, v0.2d, v2.2s
+; CHECK-COMMON-NEXT: uaddw2 v0.2d, v0.2d, v2.4s
+; CHECK-COMMON-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-COMMON-NEXT: uaddw2 v0.2d, v0.2d, v1.4s
+; CHECK-COMMON-NEXT: ret
entry:
%input.wide = zext <8 x i16> %input to <8 x i64>
%partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %input.wide)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
index af813ff16a202..33d5ac4cd299e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes=true < %s | FileCheck %s --check-prefixes=COMMON,NEON
-; RUN: llc -mattr=+sve,+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes=true < %s | FileCheck %s --check-prefixes=COMMON,SVE
-; RUN: llc -mattr=+sme,+i8mm -aarch64-enable-partial-reduce-nodes=true -force-streaming < %s | FileCheck %s --check-prefix=SME
+; RUN: llc -mattr=+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=COMMON,NEON
+; RUN: llc -mattr=+sve,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=COMMON,SVE
+; RUN: llc -mattr=+sme,+i8mm -force-streaming < %s | FileCheck %s --check-prefix=SME
target triple = "aarch64"
diff --git a/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
index 221a15e5c8fe6..b2cde51e99619 100644
--- a/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll
@@ -1,20 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-I8MM
-; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM
-; RUN: llc -mtriple=aarch64 -mattr=+sve,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE
-; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE2
-; RUN: llc -mtriple=aarch64 -mattr=+sve,+sme,+i8mm -force-streaming -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SME
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefix=CHECK-SVE2
+; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm %s -o - | FileCheck %s --check-prefix=CHECK-SVE2-I8MM
+; RUN: llc -mtriple=aarch64 -mattr=+sve2,+sme,+i8mm -force-streaming %s -o - | FileCheck %s --check-prefix=CHECK-SME
define <vscale x 4 x i32> @udot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: udot:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: udot z0.s, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: udot z0.s, z1.b, z2.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: udot:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: udot:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i32>
@@ -24,15 +27,20 @@ entry:
}
define <vscale x 2 x i64> @udot_wide(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: udot_wide:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: udot z0.d, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_wide:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: udot_wide:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot_wide:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: udot z0.d, z1.h, z2.h
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: udot_wide:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = zext <vscale x 8 x i16> %b to <vscale x 8 x i64>
@@ -42,15 +50,20 @@ entry:
}
define <vscale x 4 x i32> @sdot(<vscale x 4 x i32> %accc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: sdot:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sdot z0.s, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: sdot z0.s, z1.b, z2.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: sdot:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: sdot:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i32>
@@ -60,15 +73,20 @@ entry:
}
define <vscale x 2 x i64> @sdot_wide(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: sdot_wide:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sdot z0.d, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_wide:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: sdot_wide:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot_wide:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: sdot z0.d, z1.h, z2.h
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sdot_wide:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = sext <vscale x 8 x i16> %b to <vscale x 8 x i64>
@@ -78,36 +96,36 @@ entry:
}
define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-I8MM-LABEL: usdot:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: usdot z0.s, z1.b, z2.b
-; CHECK-I8MM-NEXT: ret
+; CHECK-SVE2-LABEL: usdot:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uunpklo z3.h, z1.b
+; CHECK-SVE2-NEXT: sunpklo z4.h, z2.b
+; CHECK-SVE2-NEXT: ptrue p0.s
+; CHECK-SVE2-NEXT: uunpkhi z1.h, z1.b
+; CHECK-SVE2-NEXT: sunpkhi z2.h, z2.b
+; CHECK-SVE2-NEXT: uunpklo z5.s, z3.h
+; CHECK-SVE2-NEXT: sunpklo z6.s, z4.h
+; CHECK-SVE2-NEXT: uunpkhi z3.s, z3.h
+; CHECK-SVE2-NEXT: sunpkhi z4.s, z4.h
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z5.s, z6.s
+; CHECK-SVE2-NEXT: uunpklo z5.s, z1.h
+; CHECK-SVE2-NEXT: sunpklo z6.s, z2.h
+; CHECK-SVE2-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z3.s, z4.s
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z5.s, z6.s
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z1.s, z2.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NOI8MM-LABEL: usdot:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: uunpklo z3.h, z1.b
-; CHECK-NOI8MM-NEXT: sunpklo z4.h, z2.b
-; CHECK-NOI8MM-NEXT: ptrue p0.s
-; CHECK-NOI8MM-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NOI8MM-NEXT: sunpkhi z2.h, z2.b
-; CHECK-NOI8MM-NEXT: uunpklo z5.s, z3.h
-; CHECK-NOI8MM-NEXT: sunpklo z6.s, z4.h
-; CHECK-NOI8MM-NEXT: uunpkhi z3.s, z3.h
-; CHECK-NOI8MM-NEXT: sunpkhi z4.s, z4.h
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z5.s, z6.s
-; CHECK-NOI8MM-NEXT: uunpklo z5.s, z1.h
-; CHECK-NOI8MM-NEXT: sunpklo z6.s, z2.h
-; CHECK-NOI8MM-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NOI8MM-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z3.s, z4.s
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z5.s, z6.s
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z1.s, z2.s
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: usdot:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: usdot z0.s, z1.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: usdot:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: usdot z0.s, z1.b, z2.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: usdot:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: usdot z0.s, z1.b, z2.b
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i32>
@@ -117,36 +135,36 @@ entry:
}
define <vscale x 4 x i32> @sudot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-I8MM-LABEL: sudot:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: usdot z0.s, z2.b, z1.b
-; CHECK-I8MM-NEXT: ret
+; CHECK-SVE2-LABEL: sudot:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: sunpklo z3.h, z1.b
+; CHECK-SVE2-NEXT: uunpklo z4.h, z2.b
+; CHECK-SVE2-NEXT: ptrue p0.s
+; CHECK-SVE2-NEXT: sunpkhi z1.h, z1.b
+; CHECK-SVE2-NEXT: uunpkhi z2.h, z2.b
+; CHECK-SVE2-NEXT: sunpklo z5.s, z3.h
+; CHECK-SVE2-NEXT: uunpklo z6.s, z4.h
+; CHECK-SVE2-NEXT: sunpkhi z3.s, z3.h
+; CHECK-SVE2-NEXT: uunpkhi z4.s, z4.h
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z5.s, z6.s
+; CHECK-SVE2-NEXT: sunpklo z5.s, z1.h
+; CHECK-SVE2-NEXT: uunpklo z6.s, z2.h
+; CHECK-SVE2-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z3.s, z4.s
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z5.s, z6.s
+; CHECK-SVE2-NEXT: mla z0.s, p0/m, z1.s, z2.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NOI8MM-LABEL: sudot:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: sunpklo z3.h, z1.b
-; CHECK-NOI8MM-NEXT: uunpklo z4.h, z2.b
-; CHECK-NOI8MM-NEXT: ptrue p0.s
-; CHECK-NOI8MM-NEXT: sunpkhi z1.h, z1.b
-; CHECK-NOI8MM-NEXT: uunpkhi z2.h, z2.b
-; CHECK-NOI8MM-NEXT: sunpklo z5.s, z3.h
-; CHECK-NOI8MM-NEXT: uunpklo z6.s, z4.h
-; CHECK-NOI8MM-NEXT: sunpkhi z3.s, z3.h
-; CHECK-NOI8MM-NEXT: uunpkhi z4.s, z4.h
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z5.s, z6.s
-; CHECK-NOI8MM-NEXT: sunpklo z5.s, z1.h
-; CHECK-NOI8MM-NEXT: uunpklo z6.s, z2.h
-; CHECK-NOI8MM-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NOI8MM-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z3.s, z4.s
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z5.s, z6.s
-; CHECK-NOI8MM-NEXT: mla z0.s, p0/m, z1.s, z2.s
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: sudot:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: usdot z0.s, z2.b, z1.b
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sudot:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: usdot z0.s, z2.b, z1.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sudot:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: usdot z0.s, z2.b, z1.b
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i32>
@@ -156,41 +174,29 @@ entry:
}
define <vscale x 4 x i64> @udot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: udot_8to64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEXT: udot z4.s, z2.b, z3.b
-; CHECK-NEXT: sunpklo z2.d, z4.s
-; CHECK-NEXT: sunpkhi z3.d, z4.s
-; CHECK-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEXT: add z1.d, z1.d, z3.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_8to64:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: movi v4.2d, #0000000000000000
+; CHECK-SVE2-NEXT: udot z4.s, z2.b, z3.b
+; CHECK-SVE2-NEXT: uaddwb z0.d, z0.d, z4.s
+; CHECK-SVE2-NEXT: uaddwt z0.d, z0.d, z4.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: udot_8to64:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE-NEXT: udot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z3.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: udot_8to64:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-SVE2-I8MM-NEXT: udot z4.s, z2.b, z3.b
+; CHECK-SVE2-I8MM-NEXT: uaddwb z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: uaddwt z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE2-LABEL: udot_8to64:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE2-NEXT: udot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
-;
-; CHECK-NEWLOWERING-SME-LABEL: udot_8to64:
-; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
-; CHECK-NEWLOWERING-SME-NEXT: udot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SME-NEXT: uaddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: uaddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: udot_8to64:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z4.s, #0 // =0x0
+; CHECK-SME-NEXT: udot z4.s, z2.b, z3.b
+; CHECK-SME-NEXT: uaddwb z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: uaddwt z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i64>
@@ -201,41 +207,29 @@ entry:
}
define <vscale x 4 x i64> @sdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
-; CHECK-LABEL: sdot_8to64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEXT: sdot z4.s, z2.b, z3.b
-; CHECK-NEXT: sunpklo z2.d, z4.s
-; CHECK-NEXT: sunpkhi z3.d, z4.s
-; CHECK-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEXT: add z1.d, z1.d, z3.d
-; CHECK-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE-LABEL: sdot_8to64:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE-NEXT: sdot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_8to64:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: movi v4.2d, #0000000000000000
+; CHECK-SVE2-NEXT: sdot z4.s, z2.b, z3.b
+; CHECK-SVE2-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SVE2-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE2-LABEL: sdot_8to64:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE2-NEXT: sdot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: sdot_8to64:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-SVE2-I8MM-NEXT: sdot z4.s, z2.b, z3.b
+; CHECK-SVE2-I8MM-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SME-LABEL: sdot_8to64:
-; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
-; CHECK-NEWLOWERING-SME-NEXT: sdot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: sdot_8to64:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z4.s, #0 // =0x0
+; CHECK-SME-NEXT: sdot z4.s, z2.b, z3.b
+; CHECK-SME-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i64>
@@ -246,82 +240,62 @@ entry:
}
define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
-; CHECK-I8MM-LABEL: usdot_8to64:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: usdot z4.s, z2.b, z3.b
-; CHECK-I8MM-NEXT: sunpklo z2.d, z4.s
-; CHECK-I8MM-NEXT: sunpkhi z3.d, z4.s
-; CHECK-I8MM-NEXT: add z0.d, z0.d, z2.d
-; CHECK-I8MM-NEXT: add z1.d, z1.d, z3.d
-; CHECK-I8MM-NEXT: ret
-;
-; CHECK-NOI8MM-LABEL: usdot_8to64:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: uunpkhi z4.h, z2.b
-; CHECK-NOI8MM-NEXT: uunpklo z2.h, z2.b
-; CHECK-NOI8MM-NEXT: sunpkhi z5.h, z3.b
-; CHECK-NOI8MM-NEXT: sunpklo z3.h, z3.b
-; CHECK-NOI8MM-NEXT: ptrue p0.d
-; CHECK-NOI8MM-NEXT: uunpklo z6.s, z4.h
-; CHECK-NOI8MM-NEXT: uunpklo z7.s, z2.h
-; CHECK-NOI8MM-NEXT: sunpklo z24.s, z5.h
-; CHECK-NOI8MM-NEXT: sunpklo z25.s, z3.h
-; CHECK-NOI8MM-NEXT: uunpkhi z4.s, z4.h
-; CHECK-NOI8MM-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NOI8MM-NEXT: sunpkhi z5.s, z5.h
-; CHECK-NOI8MM-NEXT: sunpkhi z3.s, z3.h
-; CHECK-NOI8MM-NEXT: uunpklo z26.d, z6.s
-; CHECK-NOI8MM-NEXT: uunpklo z27.d, z7.s
-; CHECK-NOI8MM-NEXT: sunpklo z28.d, z24.s
-; CHECK-NOI8MM-NEXT: sunpklo z29.d, z25.s
-; CHECK-NOI8MM-NEXT: uunpkhi z6.d, z6.s
-; CHECK-NOI8MM-NEXT: uunpkhi z7.d, z7.s
-; CHECK-NOI8MM-NEXT: sunpkhi z24.d, z24.s
-; CHECK-NOI8MM-NEXT: sunpkhi z25.d, z25.s
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z26.d, z28.d
-; CHECK-NOI8MM-NEXT: uunpklo z26.d, z4.s
-; CHECK-NOI8MM-NEXT: sunpklo z28.d, z5.s
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z27.d, z29.d
-; CHECK-NOI8MM-NEXT: uunpklo z27.d, z2.s
-; CHECK-NOI8MM-NEXT: sunpklo z29.d, z3.s
-; CHECK-NOI8MM-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NOI8MM-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NOI8MM-NEXT: sunpkhi z5.d, z5.s
-; CHECK-NOI8MM-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z6.d, z24.d
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z7.d, z25.d
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z26.d, z28.d
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z27.d, z29.d
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z4.d, z5.d
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z2.d, z3.d
-; CHECK-NOI8MM-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE-LABEL: usdot_8to64:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE-NEXT: usdot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
+; CHECK-SVE2-LABEL: usdot_8to64:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uunpkhi z4.h, z2.b
+; CHECK-SVE2-NEXT: uunpklo z2.h, z2.b
+; CHECK-SVE2-NEXT: sunpkhi z5.h, z3.b
+; CHECK-SVE2-NEXT: sunpklo z3.h, z3.b
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: uunpklo z6.s, z4.h
+; CHECK-SVE2-NEXT: uunpklo z7.s, z2.h
+; CHECK-SVE2-NEXT: sunpklo z24.s, z5.h
+; CHECK-SVE2-NEXT: sunpklo z25.s, z3.h
+; CHECK-SVE2-NEXT: uunpkhi z4.s, z4.h
+; CHECK-SVE2-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: sunpkhi z5.s, z5.h
+; CHECK-SVE2-NEXT: sunpkhi z3.s, z3.h
+; CHECK-SVE2-NEXT: uunpklo z26.d, z6.s
+; CHECK-SVE2-NEXT: uunpklo z27.d, z7.s
+; CHECK-SVE2-NEXT: sunpklo z28.d, z24.s
+; CHECK-SVE2-NEXT: sunpklo z29.d, z25.s
+; CHECK-SVE2-NEXT: uunpkhi z6.d, z6.s
+; CHECK-SVE2-NEXT: uunpkhi z7.d, z7.s
+; CHECK-SVE2-NEXT: sunpkhi z24.d, z24.s
+; CHECK-SVE2-NEXT: sunpkhi z25.d, z25.s
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z26.d, z28.d
+; CHECK-SVE2-NEXT: uunpklo z26.d, z4.s
+; CHECK-SVE2-NEXT: sunpklo z28.d, z5.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z27.d, z29.d
+; CHECK-SVE2-NEXT: uunpklo z27.d, z2.s
+; CHECK-SVE2-NEXT: sunpklo z29.d, z3.s
+; CHECK-SVE2-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: sunpkhi z5.d, z5.s
+; CHECK-SVE2-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z6.d, z24.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z7.d, z25.d
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z26.d, z28.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z27.d, z29.d
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z4.d, z5.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z2.d, z3.d
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE2-LABEL: usdot_8to64:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE2-NEXT: usdot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: usdot_8to64:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-SVE2-I8MM-NEXT: usdot z4.s, z2.b, z3.b
+; CHECK-SVE2-I8MM-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SME-LABEL: usdot_8to64:
-; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
-; CHECK-NEWLOWERING-SME-NEXT: usdot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: usdot_8to64:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z4.s, #0 // =0x0
+; CHECK-SME-NEXT: usdot z4.s, z2.b, z3.b
+; CHECK-SME-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i64>
@@ -332,82 +306,62 @@ entry:
}
define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-I8MM-LABEL: sudot_8to64:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v4.2d, #0000000000000000
-; CHECK-I8MM-NEXT: usdot z4.s, z3.b, z2.b
-; CHECK-I8MM-NEXT: sunpklo z2.d, z4.s
-; CHECK-I8MM-NEXT: sunpkhi z3.d, z4.s
-; CHECK-I8MM-NEXT: add z0.d, z0.d, z2.d
-; CHECK-I8MM-NEXT: add z1.d, z1.d, z3.d
-; CHECK-I8MM-NEXT: ret
+; CHECK-SVE2-LABEL: sudot_8to64:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: sunpkhi z4.h, z2.b
+; CHECK-SVE2-NEXT: sunpklo z2.h, z2.b
+; CHECK-SVE2-NEXT: uunpkhi z5.h, z3.b
+; CHECK-SVE2-NEXT: uunpklo z3.h, z3.b
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: sunpklo z6.s, z4.h
+; CHECK-SVE2-NEXT: sunpklo z7.s, z2.h
+; CHECK-SVE2-NEXT: uunpklo z24.s, z5.h
+; CHECK-SVE2-NEXT: uunpklo z25.s, z3.h
+; CHECK-SVE2-NEXT: sunpkhi z4.s, z4.h
+; CHECK-SVE2-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: uunpkhi z5.s, z5.h
+; CHECK-SVE2-NEXT: uunpkhi z3.s, z3.h
+; CHECK-SVE2-NEXT: sunpklo z26.d, z6.s
+; CHECK-SVE2-NEXT: sunpklo z27.d, z7.s
+; CHECK-SVE2-NEXT: uunpklo z28.d, z24.s
+; CHECK-SVE2-NEXT: uunpklo z29.d, z25.s
+; CHECK-SVE2-NEXT: sunpkhi z6.d, z6.s
+; CHECK-SVE2-NEXT: sunpkhi z7.d, z7.s
+; CHECK-SVE2-NEXT: uunpkhi z24.d, z24.s
+; CHECK-SVE2-NEXT: uunpkhi z25.d, z25.s
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z26.d, z28.d
+; CHECK-SVE2-NEXT: sunpklo z26.d, z4.s
+; CHECK-SVE2-NEXT: uunpklo z28.d, z5.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z27.d, z29.d
+; CHECK-SVE2-NEXT: sunpklo z27.d, z2.s
+; CHECK-SVE2-NEXT: uunpklo z29.d, z3.s
+; CHECK-SVE2-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: uunpkhi z5.d, z5.s
+; CHECK-SVE2-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z6.d, z24.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z7.d, z25.d
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z26.d, z28.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z27.d, z29.d
+; CHECK-SVE2-NEXT: mla z1.d, p0/m, z4.d, z5.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z2.d, z3.d
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NOI8MM-LABEL: sudot_8to64:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: sunpkhi z4.h, z2.b
-; CHECK-NOI8MM-NEXT: sunpklo z2.h, z2.b
-; CHECK-NOI8MM-NEXT: uunpkhi z5.h, z3.b
-; CHECK-NOI8MM-NEXT: uunpklo z3.h, z3.b
-; CHECK-NOI8MM-NEXT: ptrue p0.d
-; CHECK-NOI8MM-NEXT: sunpklo z6.s, z4.h
-; CHECK-NOI8MM-NEXT: sunpklo z7.s, z2.h
-; CHECK-NOI8MM-NEXT: uunpklo z24.s, z5.h
-; CHECK-NOI8MM-NEXT: uunpklo z25.s, z3.h
-; CHECK-NOI8MM-NEXT: sunpkhi z4.s, z4.h
-; CHECK-NOI8MM-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NOI8MM-NEXT: uunpkhi z5.s, z5.h
-; CHECK-NOI8MM-NEXT: uunpkhi z3.s, z3.h
-; CHECK-NOI8MM-NEXT: sunpklo z26.d, z6.s
-; CHECK-NOI8MM-NEXT: sunpklo z27.d, z7.s
-; CHECK-NOI8MM-NEXT: uunpklo z28.d, z24.s
-; CHECK-NOI8MM-NEXT: uunpklo z29.d, z25.s
-; CHECK-NOI8MM-NEXT: sunpkhi z6.d, z6.s
-; CHECK-NOI8MM-NEXT: sunpkhi z7.d, z7.s
-; CHECK-NOI8MM-NEXT: uunpkhi z24.d, z24.s
-; CHECK-NOI8MM-NEXT: uunpkhi z25.d, z25.s
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z26.d, z28.d
-; CHECK-NOI8MM-NEXT: sunpklo z26.d, z4.s
-; CHECK-NOI8MM-NEXT: uunpklo z28.d, z5.s
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z27.d, z29.d
-; CHECK-NOI8MM-NEXT: sunpklo z27.d, z2.s
-; CHECK-NOI8MM-NEXT: uunpklo z29.d, z3.s
-; CHECK-NOI8MM-NEXT: sunpkhi z4.d, z4.s
-; CHECK-NOI8MM-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NOI8MM-NEXT: uunpkhi z5.d, z5.s
-; CHECK-NOI8MM-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z6.d, z24.d
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z7.d, z25.d
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z26.d, z28.d
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z27.d, z29.d
-; CHECK-NOI8MM-NEXT: mla z1.d, p0/m, z4.d, z5.d
-; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z2.d, z3.d
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: sudot_8to64:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: movi v4.2d, #0000000000000000
+; CHECK-SVE2-I8MM-NEXT: usdot z4.s, z3.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: sudot_8to64:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE-NEXT: usdot z4.s, z3.b, z2.b
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: sudot_8to64:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE2-NEXT: usdot z4.s, z3.b, z2.b
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
-;
-; CHECK-NEWLOWERING-SME-LABEL: sudot_8to64:
-; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
-; CHECK-NEWLOWERING-SME-NEXT: usdot z4.s, z3.b, z2.b
-; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: sudot_8to64:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z4.s, #0 // =0x0
+; CHECK-SME-NEXT: usdot z4.s, z3.b, z2.b
+; CHECK-SME-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i64>
@@ -418,51 +372,69 @@ entry:
}
define <vscale x 4 x i32> @udot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a){
-; CHECK-LABEL: udot_no_bin_op:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.b, #1 // =0x1
-; CHECK-NEXT: udot z0.s, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_no_bin_op:
+; CHECK-SVE2: // %bb.0:
+; CHECK-SVE2-NEXT: mov z2.b, #1 // =0x1
+; CHECK-SVE2-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot_no_bin_op:
-; CHECK-NEWLOWERING: // %bb.0:
-; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
-; CHECK-NEWLOWERING-NEXT: udot z0.s, z1.b, z2.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: udot_no_bin_op:
+; CHECK-SVE2-I8MM: // %bb.0:
+; CHECK-SVE2-I8MM-NEXT: mov z2.b, #1 // =0x1
+; CHECK-SVE2-I8MM-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: udot_no_bin_op:
+; CHECK-SME: // %bb.0:
+; CHECK-SME-NEXT: mov z2.b, #1 // =0x1
+; CHECK-SME-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SME-NEXT: ret
%a.ext = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
ret <vscale x 4 x i32> %partial.reduce
}
define <vscale x 4 x i32> @sdot_no_bin_op(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a){
-; CHECK-LABEL: sdot_no_bin_op:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.b, #1 // =0x1
-; CHECK-NEXT: sdot z0.s, z1.b, z2.b
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_no_bin_op:
+; CHECK-SVE2: // %bb.0:
+; CHECK-SVE2-NEXT: mov z2.b, #1 // =0x1
+; CHECK-SVE2-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: sdot_no_bin_op:
+; CHECK-SVE2-I8MM: // %bb.0:
+; CHECK-SVE2-I8MM-NEXT: mov z2.b, #1 // =0x1
+; CHECK-SVE2-I8MM-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op:
-; CHECK-NEWLOWERING: // %bb.0:
-; CHECK-NEWLOWERING-NEXT: mov z2.b, #1 // =0x1
-; CHECK-NEWLOWERING-NEXT: sdot z0.s, z1.b, z2.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sdot_no_bin_op:
+; CHECK-SME: // %bb.0:
+; CHECK-SME-NEXT: mov z2.b, #1 // =0x1
+; CHECK-SME-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SME-NEXT: ret
%a.ext = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %a.ext)
ret <vscale x 4 x i32> %partial.reduce
}
define <vscale x 2 x i64> @udot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
-; CHECK-LABEL: udot_no_bin_op_wide:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.h, #1 // =0x1
-; CHECK-NEXT: udot z0.d, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_no_bin_op_wide:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: mov z2.h, #1 // =0x1
+; CHECK-SVE2-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_wide:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
-; CHECK-NEWLOWERING-NEXT: udot z0.d, z1.h, z2.h
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: udot_no_bin_op_wide:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: mov z2.h, #1 // =0x1
+; CHECK-SVE2-I8MM-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: udot_no_bin_op_wide:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z2.h, #1 // =0x1
+; CHECK-SME-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %a.wide)
@@ -470,17 +442,23 @@ entry:
}
define <vscale x 2 x i64> @sdot_no_bin_op_wide(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
-; CHECK-LABEL: sdot_no_bin_op_wide:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.h, #1 // =0x1
-; CHECK-NEXT: sdot z0.d, z1.h, z2.h
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_no_bin_op_wide:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: mov z2.h, #1 // =0x1
+; CHECK-SVE2-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: sdot_no_bin_op_wide:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: mov z2.h, #1 // =0x1
+; CHECK-SVE2-I8MM-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_wide:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: mov z2.h, #1 // =0x1
-; CHECK-NEWLOWERING-NEXT: sdot z0.d, z1.h, z2.h
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sdot_no_bin_op_wide:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z2.h, #1 // =0x1
+; CHECK-SME-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %a.wide)
@@ -488,137 +466,93 @@ entry:
}
define <vscale x 4 x i64> @udot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8> %a){
-; CHECK-LABEL: udot_no_bin_op_8to64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: mov z4.b, #1 // =0x1
-; CHECK-NEXT: udot z3.s, z2.b, z4.b
-; CHECK-NEXT: sunpklo z2.d, z3.s
-; CHECK-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEXT: add z1.d, z1.d, z3.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_no_bin_op_8to64:
+; CHECK-SVE2: // %bb.0:
+; CHECK-SVE2-NEXT: movi v3.2d, #0000000000000000
+; CHECK-SVE2-NEXT: mov z4.b, #1 // =0x1
+; CHECK-SVE2-NEXT: udot z3.s, z2.b, z4.b
+; CHECK-SVE2-NEXT: uaddwb z0.d, z0.d, z3.s
+; CHECK-SVE2-NEXT: uaddwt z0.d, z0.d, z3.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: udot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-SVE: // %bb.0:
-; CHECK-NEWLOWERING-SVE-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE-NEXT: mov z4.b, #1 // =0x1
-; CHECK-NEWLOWERING-SVE-NEXT: udot z3.s, z2.b, z4.b
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: udot_no_bin_op_8to64:
+; CHECK-SVE2-I8MM: // %bb.0:
+; CHECK-SVE2-I8MM-NEXT: movi v3.2d, #0000000000000000
+; CHECK-SVE2-I8MM-NEXT: mov z4.b, #1 // =0x1
+; CHECK-SVE2-I8MM-NEXT: udot z3.s, z2.b, z4.b
+; CHECK-SVE2-I8MM-NEXT: uaddwb z0.d, z0.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: uaddwt z0.d, z0.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE2-LABEL: udot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-SVE2: // %bb.0:
-; CHECK-NEWLOWERING-SVE2-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE2-NEXT: mov z4.b, #1 // =0x1
-; CHECK-NEWLOWERING-SVE2-NEXT: udot z3.s, z2.b, z4.b
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
-;
-; CHECK-NEWLOWERING-SME-LABEL: udot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-SME: // %bb.0:
-; CHECK-NEWLOWERING-SME-NEXT: mov z3.b, #1 // =0x1
-; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
-; CHECK-NEWLOWERING-SME-NEXT: udot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SME-NEXT: uaddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: uaddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: udot_no_bin_op_8to64:
+; CHECK-SME: // %bb.0:
+; CHECK-SME-NEXT: mov z3.b, #1 // =0x1
+; CHECK-SME-NEXT: mov z4.s, #0 // =0x0
+; CHECK-SME-NEXT: udot z4.s, z2.b, z3.b
+; CHECK-SME-NEXT: uaddwb z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: uaddwt z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: ret
%a.ext = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
ret <vscale x 4 x i64> %partial.reduce
}
define <vscale x 4 x i64> @sdot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8> %a){
-; CHECK-LABEL: sdot_no_bin_op_8to64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: mov z4.b, #1 // =0x1
-; CHECK-NEXT: sdot z3.s, z2.b, z4.b
-; CHECK-NEXT: sunpklo z2.d, z3.s
-; CHECK-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEXT: add z1.d, z1.d, z3.d
-; CHECK-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE-LABEL: sdot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-SVE: // %bb.0:
-; CHECK-NEWLOWERING-SVE-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE-NEXT: mov z4.b, #1 // =0x1
-; CHECK-NEWLOWERING-SVE-NEXT: sdot z3.s, z2.b, z4.b
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_no_bin_op_8to64:
+; CHECK-SVE2: // %bb.0:
+; CHECK-SVE2-NEXT: movi v3.2d, #0000000000000000
+; CHECK-SVE2-NEXT: mov z4.b, #1 // =0x1
+; CHECK-SVE2-NEXT: sdot z3.s, z2.b, z4.b
+; CHECK-SVE2-NEXT: saddwb z0.d, z0.d, z3.s
+; CHECK-SVE2-NEXT: saddwt z0.d, z0.d, z3.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE2-LABEL: sdot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-SVE2: // %bb.0:
-; CHECK-NEWLOWERING-SVE2-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEWLOWERING-SVE2-NEXT: mov z4.b, #1 // =0x1
-; CHECK-NEWLOWERING-SVE2-NEXT: sdot z3.s, z2.b, z4.b
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: sdot_no_bin_op_8to64:
+; CHECK-SVE2-I8MM: // %bb.0:
+; CHECK-SVE2-I8MM-NEXT: movi v3.2d, #0000000000000000
+; CHECK-SVE2-I8MM-NEXT: mov z4.b, #1 // =0x1
+; CHECK-SVE2-I8MM-NEXT: sdot z3.s, z2.b, z4.b
+; CHECK-SVE2-I8MM-NEXT: saddwb z0.d, z0.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: saddwt z0.d, z0.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SME-LABEL: sdot_no_bin_op_8to64:
-; CHECK-NEWLOWERING-SME: // %bb.0:
-; CHECK-NEWLOWERING-SME-NEXT: mov z3.b, #1 // =0x1
-; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
-; CHECK-NEWLOWERING-SME-NEXT: sdot z4.s, z2.b, z3.b
-; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: sdot_no_bin_op_8to64:
+; CHECK-SME: // %bb.0:
+; CHECK-SME-NEXT: mov z3.b, #1 // =0x1
+; CHECK-SME-NEXT: mov z4.s, #0 // =0x0
+; CHECK-SME-NEXT: sdot z4.s, z2.b, z3.b
+; CHECK-SME-NEXT: saddwb z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: saddwt z0.d, z0.d, z4.s
+; CHECK-SME-NEXT: ret
%a.ext = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64(<vscale x 4 x i64> %acc, <vscale x 16 x i64> %a.ext)
ret <vscale x 4 x i64> %partial.reduce
}
define <vscale x 4 x i32> @not_udot(<vscale x 4 x i32> %acc, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
-; CHECK-LABEL: not_udot:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEXT: mla z0.s, p0/m, z3.s, z4.s
-; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: not_udot:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-NEXT: and z1.h, z1.h, #0xff
+; CHECK-SVE2-NEXT: umlalb z0.s, z1.h, z2.h
+; CHECK-SVE2-NEXT: umlalt z0.s, z1.h, z2.h
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: not_udot:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEWLOWERING-SVE-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEWLOWERING-SVE-NEXT: ptrue p0.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-SVE-NEXT: mla z0.s, p0/m, z3.s, z4.s
-; CHECK-NEWLOWERING-SVE-NEXT: mla z0.s, p0/m, z1.s, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: not_udot:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: and z1.h, z1.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: umlalb z0.s, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: umlalt z0.s, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE2-LABEL: not_udot:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEWLOWERING-SVE2-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEWLOWERING-SVE2-NEXT: umlalb z0.s, z1.h, z2.h
-; CHECK-NEWLOWERING-SVE2-NEXT: umlalt z0.s, z1.h, z2.h
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
-;
-; CHECK-NEWLOWERING-SME-LABEL: not_udot:
-; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SME-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEWLOWERING-SME-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEWLOWERING-SME-NEXT: umlalb z0.s, z1.h, z2.h
-; CHECK-NEWLOWERING-SME-NEXT: umlalt z0.s, z1.h, z2.h
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: not_udot:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SME-NEXT: and z1.h, z1.h, #0xff
+; CHECK-SME-NEXT: umlalb z0.s, z1.h, z2.h
+; CHECK-SME-NEXT: umlalt z0.s, z1.h, z2.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
%b.wide = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -628,47 +562,29 @@ entry:
}
define <vscale x 2 x i64> @not_udot_wide(<vscale x 2 x i64> %acc, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
-; CHECK-LABEL: not_udot_wide:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEXT: and z2.s, z2.s, #0xffff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: uunpklo z4.d, z2.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE-LABEL: not_udot_wide:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEWLOWERING-SVE-NEXT: and z2.s, z2.s, #0xffff
-; CHECK-NEWLOWERING-SVE-NEXT: ptrue p0.d
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z4.d, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEWLOWERING-SVE-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
+; CHECK-SVE2-LABEL: not_udot_wide:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: and z2.s, z2.s, #0xffff
+; CHECK-SVE2-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SVE2-NEXT: umlalb z0.d, z1.s, z2.s
+; CHECK-SVE2-NEXT: umlalt z0.d, z1.s, z2.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE2-LABEL: not_udot_wide:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: and z2.s, z2.s, #0xffff
-; CHECK-NEWLOWERING-SVE2-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEWLOWERING-SVE2-NEXT: umlalb z0.d, z1.s, z2.s
-; CHECK-NEWLOWERING-SVE2-NEXT: umlalt z0.d, z1.s, z2.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: not_udot_wide:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: and z2.s, z2.s, #0xffff
+; CHECK-SVE2-I8MM-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SVE2-I8MM-NEXT: umlalb z0.d, z1.s, z2.s
+; CHECK-SVE2-I8MM-NEXT: umlalt z0.d, z1.s, z2.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-SME-LABEL: not_udot_wide:
-; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SME-NEXT: and z2.s, z2.s, #0xffff
-; CHECK-NEWLOWERING-SME-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEWLOWERING-SME-NEXT: umlalb z0.d, z1.s, z2.s
-; CHECK-NEWLOWERING-SME-NEXT: umlalt z0.d, z1.s, z2.s
-; CHECK-NEWLOWERING-SME-NEXT: ret
+; CHECK-SME-LABEL: not_udot_wide:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: and z2.s, z2.s, #0xffff
+; CHECK-SME-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SME-NEXT: umlalb z0.d, z1.s, z2.s
+; CHECK-SME-NEXT: umlalt z0.d, z1.s, z2.s
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 4 x i16> %a to <vscale x 4 x i64>
%b.wide = zext <vscale x 4 x i16> %b to <vscale x 4 x i64>
@@ -678,47 +594,68 @@ entry:
}
define <vscale x 2 x i64> @not_usdot(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: not_usdot:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEXT: sunpklo z6.d, z4.s
-; CHECK-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEXT: sunpkhi z4.d, z4.s
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: uunpklo z5.d, z1.s
-; CHECK-NEXT: sunpklo z6.d, z2.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: not_usdot:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uunpklo z3.s, z1.h
+; CHECK-SVE2-NEXT: sunpklo z4.s, z2.h
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: uunpklo z5.d, z3.s
+; CHECK-SVE2-NEXT: sunpklo z6.d, z4.s
+; CHECK-SVE2-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: uunpklo z5.d, z1.s
+; CHECK-SVE2-NEXT: sunpklo z6.d, z2.s
+; CHECK-SVE2-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SVE2-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: not_usdot:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEWLOWERING-NEXT: ptrue p0.d
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z4.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z1.s
-; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z2.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: not_usdot:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: uunpklo z3.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: sunpklo z4.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.d
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: uunpklo z5.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: sunpklo z6.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: uunpklo z5.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: sunpklo z6.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: not_usdot:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: uunpklo z3.s, z1.h
+; CHECK-SME-NEXT: sunpklo z4.s, z2.h
+; CHECK-SME-NEXT: ptrue p0.d
+; CHECK-SME-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: uunpklo z5.d, z3.s
+; CHECK-SME-NEXT: sunpklo z6.d, z4.s
+; CHECK-SME-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SME-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: uunpklo z5.d, z1.s
+; CHECK-SME-NEXT: sunpklo z6.d, z2.s
+; CHECK-SME-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SME-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = sext <vscale x 8 x i16> %b to <vscale x 8 x i64>
@@ -728,47 +665,68 @@ entry:
}
define <vscale x 2 x i64> @not_sudot(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: not_sudot:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEXT: uunpklo z6.d, z4.s
-; CHECK-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: sunpklo z5.d, z1.s
-; CHECK-NEXT: uunpklo z6.d, z2.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: not_sudot:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: sunpklo z3.s, z1.h
+; CHECK-SVE2-NEXT: uunpklo z4.s, z2.h
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: sunpklo z5.d, z3.s
+; CHECK-SVE2-NEXT: uunpklo z6.d, z4.s
+; CHECK-SVE2-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: sunpklo z5.d, z1.s
+; CHECK-SVE2-NEXT: uunpklo z6.d, z2.s
+; CHECK-SVE2-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SVE2-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: not_sudot:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: sunpklo z3.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: uunpklo z4.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.d
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpklo z5.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: uunpklo z6.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: sunpklo z5.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: uunpklo z6.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: not_sudot:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEWLOWERING-NEXT: ptrue p0.d
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z4.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z1.s
-; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z2.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: not_sudot:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: sunpklo z3.s, z1.h
+; CHECK-SME-NEXT: uunpklo z4.s, z2.h
+; CHECK-SME-NEXT: ptrue p0.d
+; CHECK-SME-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: sunpklo z5.d, z3.s
+; CHECK-SME-NEXT: uunpklo z6.d, z4.s
+; CHECK-SME-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SME-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: sunpklo z5.d, z1.s
+; CHECK-SME-NEXT: uunpklo z6.d, z2.s
+; CHECK-SME-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SME-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = zext <vscale x 8 x i16> %b to <vscale x 8 x i64>
@@ -778,49 +736,71 @@ entry:
}
define <vscale x 2 x i64> @udot_
diff erent_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
-; CHECK-LABEL: udot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEXT: uunpklo z6.d, z4.s
-; CHECK-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: uunpklo z5.d, z1.s
-; CHECK-NEXT: uunpklo z6.d, z2.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_
diff erent_types:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-NEXT: uunpklo z3.s, z1.h
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: uunpklo z4.s, z2.h
+; CHECK-SVE2-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: uunpklo z5.d, z3.s
+; CHECK-SVE2-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: uunpklo z6.d, z4.s
+; CHECK-SVE2-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: uunpklo z5.d, z1.s
+; CHECK-SVE2-NEXT: uunpklo z6.d, z2.s
+; CHECK-SVE2-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SVE2-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot_
diff erent_types:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEWLOWERING-NEXT: ptrue p0.d
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z4.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z1.s
-; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z2.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: udot_
diff erent_types:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: uunpklo z3.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.d
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: uunpklo z4.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: uunpklo z5.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: uunpklo z6.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: uunpklo z5.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: uunpklo z6.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: udot_
diff erent_types:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SME-NEXT: uunpklo z3.s, z1.h
+; CHECK-SME-NEXT: ptrue p0.d
+; CHECK-SME-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: uunpklo z4.s, z2.h
+; CHECK-SME-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: uunpklo z5.d, z3.s
+; CHECK-SME-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SME-NEXT: uunpklo z6.d, z4.s
+; CHECK-SME-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: uunpklo z5.d, z1.s
+; CHECK-SME-NEXT: uunpklo z6.d, z2.s
+; CHECK-SME-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SME-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = zext <vscale x 8 x i8> %b to <vscale x 8 x i64>
@@ -830,51 +810,74 @@ entry:
}
define <vscale x 2 x i64> @sdot_
diff erent_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
-; CHECK-LABEL: sdot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEXT: sunpklo z6.d, z4.s
-; CHECK-NEXT: sunpkhi z4.d, z4.s
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: sunpklo z5.d, z1.s
-; CHECK-NEXT: sunpklo z6.d, z2.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_
diff erent_types:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: ptrue p0.h
+; CHECK-SVE2-NEXT: sunpklo z3.s, z1.h
+; CHECK-SVE2-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: sunpklo z5.d, z3.s
+; CHECK-SVE2-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: sunpklo z4.s, z2.h
+; CHECK-SVE2-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: sunpklo z6.d, z4.s
+; CHECK-SVE2-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: sunpklo z5.d, z1.s
+; CHECK-SVE2-NEXT: sunpklo z6.d, z2.s
+; CHECK-SVE2-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SVE2-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: sdot_
diff erent_types:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.h
+; CHECK-SVE2-I8MM-NEXT: sunpklo z3.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.d
+; CHECK-SVE2-I8MM-NEXT: sunpklo z5.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: sunpklo z4.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpklo z6.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: sunpklo z5.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: sunpklo z6.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot_
diff erent_types:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: ptrue p0.h
-; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: sxtb z2.h, p0/m, z2.h
-; CHECK-NEWLOWERING-NEXT: ptrue p0.d
-; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z4.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z1.s
-; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z2.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sdot_
diff erent_types:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: ptrue p0.h
+; CHECK-SME-NEXT: sunpklo z3.s, z1.h
+; CHECK-SME-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SME-NEXT: ptrue p0.d
+; CHECK-SME-NEXT: sunpklo z5.d, z3.s
+; CHECK-SME-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SME-NEXT: sunpklo z4.s, z2.h
+; CHECK-SME-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: sunpklo z6.d, z4.s
+; CHECK-SME-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: sunpklo z5.d, z1.s
+; CHECK-SME-NEXT: sunpklo z6.d, z2.s
+; CHECK-SME-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SME-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = sext <vscale x 8 x i8> %b to <vscale x 8 x i64>
@@ -884,51 +887,74 @@ entry:
}
define <vscale x 2 x i64> @usdot_
diff erent_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
-; CHECK-LABEL: usdot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEXT: sunpklo z6.d, z4.s
-; CHECK-NEXT: sunpkhi z4.d, z4.s
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: uunpklo z5.d, z1.s
-; CHECK-NEXT: sunpklo z6.d, z2.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: usdot_
diff erent_types:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: ptrue p0.h
+; CHECK-SVE2-NEXT: uunpklo z3.s, z1.h
+; CHECK-SVE2-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: uunpklo z5.d, z3.s
+; CHECK-SVE2-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: sunpklo z4.s, z2.h
+; CHECK-SVE2-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: sunpklo z6.d, z4.s
+; CHECK-SVE2-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: uunpklo z5.d, z1.s
+; CHECK-SVE2-NEXT: sunpklo z6.d, z2.s
+; CHECK-SVE2-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SVE2-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: usdot_
diff erent_types:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: ptrue p0.h
-; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: sxtb z2.h, p0/m, z2.h
-; CHECK-NEWLOWERING-NEXT: ptrue p0.d
-; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z4.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: uunpklo z5.d, z1.s
-; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z2.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: usdot_
diff erent_types:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.h
+; CHECK-SVE2-I8MM-NEXT: uunpklo z3.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.d
+; CHECK-SVE2-I8MM-NEXT: uunpklo z5.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: sunpklo z4.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpklo z6.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: uunpklo z5.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: sunpklo z6.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: usdot_
diff erent_types:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: ptrue p0.h
+; CHECK-SME-NEXT: uunpklo z3.s, z1.h
+; CHECK-SME-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SME-NEXT: ptrue p0.d
+; CHECK-SME-NEXT: uunpklo z5.d, z3.s
+; CHECK-SME-NEXT: uunpkhi z3.d, z3.s
+; CHECK-SME-NEXT: sunpklo z4.s, z2.h
+; CHECK-SME-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: sunpklo z6.d, z4.s
+; CHECK-SME-NEXT: sunpkhi z4.d, z4.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: uunpklo z5.d, z1.s
+; CHECK-SME-NEXT: sunpklo z6.d, z2.s
+; CHECK-SME-NEXT: uunpkhi z1.d, z1.s
+; CHECK-SME-NEXT: sunpkhi z2.d, z2.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = sext <vscale x 8 x i8> %b to <vscale x 8 x i64>
@@ -938,49 +964,71 @@ entry:
}
define <vscale x 2 x i64> @sudot_
diff erent_types(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i8> %b){
-; CHECK-LABEL: sudot_
diff erent_types:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEXT: uunpklo z6.d, z4.s
-; CHECK-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: sunpklo z5.d, z1.s
-; CHECK-NEXT: uunpklo z6.d, z2.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sudot_
diff erent_types:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-NEXT: sunpklo z3.s, z1.h
+; CHECK-SVE2-NEXT: ptrue p0.d
+; CHECK-SVE2-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: uunpklo z4.s, z2.h
+; CHECK-SVE2-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: sunpklo z5.d, z3.s
+; CHECK-SVE2-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SVE2-NEXT: uunpklo z6.d, z4.s
+; CHECK-SVE2-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: sunpklo z5.d, z1.s
+; CHECK-SVE2-NEXT: uunpklo z6.d, z2.s
+; CHECK-SVE2-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SVE2-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: sudot_
diff erent_types:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: sunpklo z3.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.d
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: uunpklo z4.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpklo z5.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SVE2-I8MM-NEXT: uunpklo z6.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: sunpklo z5.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: uunpklo z6.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SVE2-I8MM-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sudot_
diff erent_types:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEWLOWERING-NEXT: ptrue p0.d
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z4.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: sunpklo z5.d, z1.s
-; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z2.s
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z5.d, z6.d
-; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sudot_
diff erent_types:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SME-NEXT: sunpklo z3.s, z1.h
+; CHECK-SME-NEXT: ptrue p0.d
+; CHECK-SME-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: uunpklo z4.s, z2.h
+; CHECK-SME-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: sunpklo z5.d, z3.s
+; CHECK-SME-NEXT: sunpkhi z3.d, z3.s
+; CHECK-SME-NEXT: uunpklo z6.d, z4.s
+; CHECK-SME-NEXT: uunpkhi z4.d, z4.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: sunpklo z5.d, z1.s
+; CHECK-SME-NEXT: uunpklo z6.d, z2.s
+; CHECK-SME-NEXT: sunpkhi z1.d, z1.s
+; CHECK-SME-NEXT: uunpkhi z2.d, z2.s
+; CHECK-SME-NEXT: mla z0.d, p0/m, z3.d, z4.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z5.d, z6.d
+; CHECK-SME-NEXT: mla z0.d, p0/m, z1.d, z2.d
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 8 x i16> %a to <vscale x 8 x i64>
%b.wide = zext <vscale x 8 x i8> %b to <vscale x 8 x i64>
@@ -990,29 +1038,26 @@ entry:
}
define <vscale x 2 x i16> @udot_nxv8i8_promote (<vscale x 2 x i16> %acc, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b){
-; CHECK-LABEL: udot_nxv8i8_promote:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEXT: mul z1.h, z1.h, z2.h
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: uunpklo z3.d, z2.s
-; CHECK-NEXT: uunpklo z4.d, z1.s
-; CHECK-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEXT: add z2.d, z2.d, z4.d
-; CHECK-NEXT: add z0.d, z1.d, z0.d
-; CHECK-NEXT: add z0.d, z2.d, z0.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_nxv8i8_promote:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-NEXT: and z1.h, z1.h, #0xff
+; CHECK-SVE2-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot_nxv8i8_promote:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEWLOWERING-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEWLOWERING-NEXT: udot z0.d, z1.h, z2.h
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: udot_nxv8i8_promote:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: and z1.h, z1.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: udot_nxv8i8_promote:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SME-NEXT: and z1.h, z1.h, #0xff
+; CHECK-SME-NEXT: udot z0.d, z1.h, z2.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.wide = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -1022,31 +1067,29 @@ entry:
}
define <vscale x 2 x i16> @sdot_nxv8i8_promote (<vscale x 2 x i16> %acc, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b){
-; CHECK-LABEL: sdot_nxv8i8_promote:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT: sxtb z2.h, p0/m, z2.h
-; CHECK-NEXT: mul z1.h, z1.h, z2.h
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: uunpklo z3.d, z2.s
-; CHECK-NEXT: uunpklo z4.d, z1.s
-; CHECK-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: add z0.d, z0.d, z3.d
-; CHECK-NEXT: add z2.d, z2.d, z4.d
-; CHECK-NEXT: add z0.d, z1.d, z0.d
-; CHECK-NEXT: add z0.d, z2.d, z0.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_nxv8i8_promote:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: ptrue p0.h
+; CHECK-SVE2-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SVE2-NEXT: sxtb z1.h, p0/m, z1.h
+; CHECK-SVE2-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: sdot_nxv8i8_promote:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: ptrue p0.h
+; CHECK-SVE2-I8MM-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SVE2-I8MM-NEXT: sxtb z1.h, p0/m, z1.h
+; CHECK-SVE2-I8MM-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot_nxv8i8_promote:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: ptrue p0.h
-; CHECK-NEWLOWERING-NEXT: sxtb z2.h, p0/m, z2.h
-; CHECK-NEWLOWERING-NEXT: sxtb z1.h, p0/m, z1.h
-; CHECK-NEWLOWERING-NEXT: sdot z0.d, z1.h, z2.h
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sdot_nxv8i8_promote:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: ptrue p0.h
+; CHECK-SME-NEXT: sxtb z2.h, p0/m, z2.h
+; CHECK-SME-NEXT: sxtb z1.h, p0/m, z1.h
+; CHECK-SME-NEXT: sdot z0.d, z1.h, z2.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b.wide = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -1056,35 +1099,26 @@ entry:
}
define <vscale x 4 x i64> @partial_reduce_only_split_acc(<vscale x 4 x i64> %acc, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
-; CHECK-LABEL: partial_reduce_only_split_acc:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEXT: and z3.h, z3.h, #0xff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uunpkhi z4.s, z2.h
-; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: uunpkhi z5.s, z3.h
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: uunpklo z6.d, z4.s
-; CHECK-NEXT: uunpklo z7.d, z2.s
-; CHECK-NEXT: uunpklo z24.d, z5.s
-; CHECK-NEXT: uunpklo z25.d, z3.s
-; CHECK-NEXT: uunpkhi z4.d, z4.s
-; CHECK-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEXT: uunpkhi z5.d, z5.s
-; CHECK-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEXT: mla z1.d, p0/m, z6.d, z24.d
-; CHECK-NEXT: mla z0.d, p0/m, z7.d, z25.d
-; CHECK-NEXT: mla z1.d, p0/m, z4.d, z5.d
-; CHECK-NEXT: mla z0.d, p0/m, z2.d, z3.d
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: partial_reduce_only_split_acc:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: and z3.h, z3.h, #0xff
+; CHECK-SVE2-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-NEXT: udot z0.d, z2.h, z3.h
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: partial_reduce_only_split_acc:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: and z3.h, z3.h, #0xff
-; CHECK-NEWLOWERING-NEXT: and z2.h, z2.h, #0xff
-; CHECK-NEWLOWERING-NEXT: udot z0.d, z2.h, z3.h
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: partial_reduce_only_split_acc:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: and z3.h, z3.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SVE2-I8MM-NEXT: udot z0.d, z2.h, z3.h
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: partial_reduce_only_split_acc:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: and z3.h, z3.h, #0xff
+; CHECK-SME-NEXT: and z2.h, z2.h, #0xff
+; CHECK-SME-NEXT: udot z0.d, z2.h, z3.h
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 8 x i8> %a to <vscale x 8 x i64>
%b.wide = zext <vscale x 8 x i8> %b to <vscale x 8 x i64>
@@ -1095,25 +1129,23 @@ entry:
}
define <vscale x 4 x i32> @sdot_imm(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: sdot_imm:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sunpklo z2.h, z1.b
-; CHECK-NEXT: sunpkhi z1.h, z1.b
-; CHECK-NEXT: sunpklo z3.s, z2.h
-; CHECK-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEXT: sub z0.s, z0.s, z3.s
-; CHECK-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEXT: sub z0.s, z0.s, z2.s
-; CHECK-NEXT: sub z0.s, z0.s, z3.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_imm:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
+; CHECK-SVE2-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: sdot_imm:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
+; CHECK-SVE2-I8MM-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot_imm:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
-; CHECK-NEWLOWERING-NEXT: sdot z0.s, z1.b, z2.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: sdot_imm:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
+; CHECK-SME-NEXT: sdot z0.s, z1.b, z2.b
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, splat(i32 -1)
@@ -1122,41 +1154,59 @@ entry:
}
define <vscale x 4 x i32> @sdot_imm_does_not_fit(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: sdot_imm_does_not_fit:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sunpklo z2.h, z1.b
-; CHECK-NEXT: sunpkhi z1.h, z1.b
-; CHECK-NEXT: sunpklo z3.s, z2.h
-; CHECK-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEXT: sunpklo z4.s, z1.h
-; CHECK-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEXT: lsl z4.s, z4.s, #8
-; CHECK-NEXT: lsl z2.s, z2.s, #8
-; CHECK-NEXT: lsl z3.s, z3.s, #8
-; CHECK-NEXT: lsl z1.s, z1.s, #8
-; CHECK-NEXT: add z0.s, z0.s, z3.s
-; CHECK-NEXT: add z2.s, z2.s, z4.s
-; CHECK-NEXT: add z0.s, z0.s, z2.s
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: sdot_imm_does_not_fit:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: sunpklo z2.h, z1.b
+; CHECK-SVE2-NEXT: sunpkhi z1.h, z1.b
+; CHECK-SVE2-NEXT: sunpklo z3.s, z2.h
+; CHECK-SVE2-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: sunpklo z4.s, z1.h
+; CHECK-SVE2-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: lsl z4.s, z4.s, #8
+; CHECK-SVE2-NEXT: lsl z2.s, z2.s, #8
+; CHECK-SVE2-NEXT: lsl z3.s, z3.s, #8
+; CHECK-SVE2-NEXT: lsl z1.s, z1.s, #8
+; CHECK-SVE2-NEXT: add z0.s, z0.s, z3.s
+; CHECK-SVE2-NEXT: add z2.s, z2.s, z4.s
+; CHECK-SVE2-NEXT: add z0.s, z0.s, z2.s
+; CHECK-SVE2-NEXT: add z0.s, z0.s, z1.s
+; CHECK-SVE2-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: sdot_imm_does_not_fit:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z1.b
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
-; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: sunpklo z4.s, z1.h
-; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: lsl z4.s, z4.s, #8
-; CHECK-NEWLOWERING-NEXT: lsl z2.s, z2.s, #8
-; CHECK-NEWLOWERING-NEXT: lsl z3.s, z3.s, #8
-; CHECK-NEWLOWERING-NEXT: lsl z1.s, z1.s, #8
-; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
-; CHECK-NEWLOWERING-NEXT: add z2.s, z2.s, z4.s
-; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z2.s
-; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SVE2-I8MM-LABEL: sdot_imm_does_not_fit:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: sunpklo z2.h, z1.b
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.h, z1.b
+; CHECK-SVE2-I8MM-NEXT: sunpklo z3.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: sunpklo z4.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: lsl z4.s, z4.s, #8
+; CHECK-SVE2-I8MM-NEXT: lsl z2.s, z2.s, #8
+; CHECK-SVE2-I8MM-NEXT: lsl z3.s, z3.s, #8
+; CHECK-SVE2-I8MM-NEXT: lsl z1.s, z1.s, #8
+; CHECK-SVE2-I8MM-NEXT: add z0.s, z0.s, z3.s
+; CHECK-SVE2-I8MM-NEXT: add z2.s, z2.s, z4.s
+; CHECK-SVE2-I8MM-NEXT: add z0.s, z0.s, z2.s
+; CHECK-SVE2-I8MM-NEXT: add z0.s, z0.s, z1.s
+; CHECK-SVE2-I8MM-NEXT: ret
+;
+; CHECK-SME-LABEL: sdot_imm_does_not_fit:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: sunpklo z2.h, z1.b
+; CHECK-SME-NEXT: sunpkhi z1.h, z1.b
+; CHECK-SME-NEXT: sunpklo z3.s, z2.h
+; CHECK-SME-NEXT: sunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: sunpklo z4.s, z1.h
+; CHECK-SME-NEXT: sunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: lsl z4.s, z4.s, #8
+; CHECK-SME-NEXT: lsl z2.s, z2.s, #8
+; CHECK-SME-NEXT: lsl z3.s, z3.s, #8
+; CHECK-SME-NEXT: lsl z1.s, z1.s, #8
+; CHECK-SME-NEXT: add z0.s, z0.s, z3.s
+; CHECK-SME-NEXT: add z2.s, z2.s, z4.s
+; CHECK-SME-NEXT: add z0.s, z0.s, z2.s
+; CHECK-SME-NEXT: add z0.s, z0.s, z1.s
+; CHECK-SME-NEXT: ret
entry:
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, splat(i32 256)
@@ -1165,27 +1215,23 @@ entry:
}
define <vscale x 4 x i32> @udot_imm(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: udot_imm:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uunpklo z3.h, z1.b
-; CHECK-NEXT: mov z2.s, #255 // =0xff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEXT: uunpklo z4.s, z3.h
-; CHECK-NEXT: uunpkhi z3.s, z3.h
-; CHECK-NEXT: mla z0.s, p0/m, z4.s, z2.s
-; CHECK-NEXT: uunpklo z4.s, z1.h
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: mla z0.s, p0/m, z3.s, z2.s
-; CHECK-NEXT: mla z0.s, p0/m, z4.s, z2.s
-; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_imm:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
+; CHECK-SVE2-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: udot_imm:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
+; CHECK-SVE2-I8MM-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot_imm:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
-; CHECK-NEWLOWERING-NEXT: udot z0.s, z1.b, z2.b
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: udot_imm:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
+; CHECK-SME-NEXT: udot z0.s, z1.b, z2.b
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, splat(i32 255)
@@ -1194,41 +1240,59 @@ entry:
}
define <vscale x 4 x i32> @udot_imm_does_not_fit(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a) {
-; CHECK-LABEL: udot_imm_does_not_fit:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uunpklo z2.h, z1.b
-; CHECK-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEXT: uunpklo z3.s, z2.h
-; CHECK-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEXT: uunpklo z4.s, z1.h
-; CHECK-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEXT: lsl z4.s, z4.s, #8
-; CHECK-NEXT: lsl z2.s, z2.s, #8
-; CHECK-NEXT: lsl z3.s, z3.s, #8
-; CHECK-NEXT: lsl z1.s, z1.s, #8
-; CHECK-NEXT: add z0.s, z0.s, z3.s
-; CHECK-NEXT: add z2.s, z2.s, z4.s
-; CHECK-NEXT: add z0.s, z0.s, z2.s
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
+; CHECK-SVE2-LABEL: udot_imm_does_not_fit:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uunpklo z2.h, z1.b
+; CHECK-SVE2-NEXT: uunpkhi z1.h, z1.b
+; CHECK-SVE2-NEXT: uunpklo z3.s, z2.h
+; CHECK-SVE2-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-NEXT: uunpklo z4.s, z1.h
+; CHECK-SVE2-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-NEXT: lsl z4.s, z4.s, #8
+; CHECK-SVE2-NEXT: lsl z2.s, z2.s, #8
+; CHECK-SVE2-NEXT: lsl z3.s, z3.s, #8
+; CHECK-SVE2-NEXT: lsl z1.s, z1.s, #8
+; CHECK-SVE2-NEXT: add z0.s, z0.s, z3.s
+; CHECK-SVE2-NEXT: add z2.s, z2.s, z4.s
+; CHECK-SVE2-NEXT: add z0.s, z0.s, z2.s
+; CHECK-SVE2-NEXT: add z0.s, z0.s, z1.s
+; CHECK-SVE2-NEXT: ret
+;
+; CHECK-SVE2-I8MM-LABEL: udot_imm_does_not_fit:
+; CHECK-SVE2-I8MM: // %bb.0: // %entry
+; CHECK-SVE2-I8MM-NEXT: uunpklo z2.h, z1.b
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.h, z1.b
+; CHECK-SVE2-I8MM-NEXT: uunpklo z3.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SVE2-I8MM-NEXT: uunpklo z4.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SVE2-I8MM-NEXT: lsl z4.s, z4.s, #8
+; CHECK-SVE2-I8MM-NEXT: lsl z2.s, z2.s, #8
+; CHECK-SVE2-I8MM-NEXT: lsl z3.s, z3.s, #8
+; CHECK-SVE2-I8MM-NEXT: lsl z1.s, z1.s, #8
+; CHECK-SVE2-I8MM-NEXT: add z0.s, z0.s, z3.s
+; CHECK-SVE2-I8MM-NEXT: add z2.s, z2.s, z4.s
+; CHECK-SVE2-I8MM-NEXT: add z0.s, z0.s, z2.s
+; CHECK-SVE2-I8MM-NEXT: add z0.s, z0.s, z1.s
+; CHECK-SVE2-I8MM-NEXT: ret
;
-; CHECK-NEWLOWERING-LABEL: udot_imm_does_not_fit:
-; CHECK-NEWLOWERING: // %bb.0: // %entry
-; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z1.b
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z2.h
-; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
-; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z1.h
-; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-NEXT: lsl z4.s, z4.s, #8
-; CHECK-NEWLOWERING-NEXT: lsl z2.s, z2.s, #8
-; CHECK-NEWLOWERING-NEXT: lsl z3.s, z3.s, #8
-; CHECK-NEWLOWERING-NEXT: lsl z1.s, z1.s, #8
-; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z3.s
-; CHECK-NEWLOWERING-NEXT: add z2.s, z2.s, z4.s
-; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z2.s
-; CHECK-NEWLOWERING-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-SME-LABEL: udot_imm_does_not_fit:
+; CHECK-SME: // %bb.0: // %entry
+; CHECK-SME-NEXT: uunpklo z2.h, z1.b
+; CHECK-SME-NEXT: uunpkhi z1.h, z1.b
+; CHECK-SME-NEXT: uunpklo z3.s, z2.h
+; CHECK-SME-NEXT: uunpkhi z2.s, z2.h
+; CHECK-SME-NEXT: uunpklo z4.s, z1.h
+; CHECK-SME-NEXT: uunpkhi z1.s, z1.h
+; CHECK-SME-NEXT: lsl z4.s, z4.s, #8
+; CHECK-SME-NEXT: lsl z2.s, z2.s, #8
+; CHECK-SME-NEXT: lsl z3.s, z3.s, #8
+; CHECK-SME-NEXT: lsl z1.s, z1.s, #8
+; CHECK-SME-NEXT: add z0.s, z0.s, z3.s
+; CHECK-SME-NEXT: add z2.s, z2.s, z4.s
+; CHECK-SME-NEXT: add z0.s, z0.s, z2.s
+; CHECK-SME-NEXT: add z0.s, z0.s, z1.s
+; CHECK-SME-NEXT: ret
entry:
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, splat(i32 256)
diff --git a/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll b/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll
index 428dd4c3a0154..e62979d077fd2 100644
--- a/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll
+++ b/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll
@@ -1,16 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK-SVE2
-; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s --check-prefixes=CHECK-SVE
-; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING-SVE
-; RUN: llc -mtriple=aarch64 -mattr=+sve2 -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING-SVE2
+; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s --check-prefix=CHECK-SVE
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefix=CHECK-SVE2
define <vscale x 2 x i64> @signed_wide_add_nxv4i32(<vscale x 2 x i64> %acc, <vscale x 4 x i32> %input){
-; CHECK-SVE2-LABEL: signed_wide_add_nxv4i32:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: saddwb z0.d, z0.d, z1.s
-; CHECK-SVE2-NEXT: saddwt z0.d, z0.d, z1.s
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: signed_wide_add_nxv4i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: sunpklo z2.d, z1.s
@@ -19,19 +11,11 @@ define <vscale x 2 x i64> @signed_wide_add_nxv4i32(<vscale x 2 x i64> %acc, <vsc
; CHECK-SVE-NEXT: add z0.d, z0.d, z1.d
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: signed_wide_add_nxv4i32:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: signed_wide_add_nxv4i32:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: signed_wide_add_nxv4i32:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: saddwb z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: saddwt z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = sext <vscale x 4 x i32> %input to <vscale x 4 x i64>
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64(<vscale x 2 x i64> %acc, <vscale x 4 x i64> %input.wide)
@@ -39,12 +23,6 @@ entry:
}
define <vscale x 2 x i64> @unsigned_wide_add_nxv4i32(<vscale x 2 x i64> %acc, <vscale x 4 x i32> %input){
-; CHECK-SVE2-LABEL: unsigned_wide_add_nxv4i32:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: uaddwb z0.d, z0.d, z1.s
-; CHECK-SVE2-NEXT: uaddwt z0.d, z0.d, z1.s
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: unsigned_wide_add_nxv4i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: uunpklo z2.d, z1.s
@@ -53,19 +31,11 @@ define <vscale x 2 x i64> @unsigned_wide_add_nxv4i32(<vscale x 2 x i64> %acc, <v
; CHECK-SVE-NEXT: add z0.d, z0.d, z1.d
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: unsigned_wide_add_nxv4i32:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: unsigned_wide_add_nxv4i32:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: unsigned_wide_add_nxv4i32:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uaddwb z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: uaddwt z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = zext <vscale x 4 x i32> %input to <vscale x 4 x i64>
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64(<vscale x 2 x i64> %acc, <vscale x 4 x i64> %input.wide)
@@ -73,12 +43,6 @@ entry:
}
define <vscale x 4 x i32> @signed_wide_add_nxv8i16(<vscale x 4 x i32> %acc, <vscale x 8 x i16> %input){
-; CHECK-SVE2-LABEL: signed_wide_add_nxv8i16:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: saddwb z0.s, z0.s, z1.h
-; CHECK-SVE2-NEXT: saddwt z0.s, z0.s, z1.h
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: signed_wide_add_nxv8i16:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: sunpklo z2.s, z1.h
@@ -87,19 +51,11 @@ define <vscale x 4 x i32> @signed_wide_add_nxv8i16(<vscale x 4 x i32> %acc, <vsc
; CHECK-SVE-NEXT: add z0.s, z0.s, z1.s
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: signed_wide_add_nxv8i16:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.s, z0.s, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: signed_wide_add_nxv8i16:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.s, z0.s, z1.h
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.s, z0.s, z1.h
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: signed_wide_add_nxv8i16:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: saddwb z0.s, z0.s, z1.h
+; CHECK-SVE2-NEXT: saddwt z0.s, z0.s, z1.h
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = sext <vscale x 8 x i16> %input to <vscale x 8 x i32>
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> %acc, <vscale x 8 x i32> %input.wide)
@@ -107,12 +63,6 @@ entry:
}
define <vscale x 4 x i32> @unsigned_wide_add_nxv8i16(<vscale x 4 x i32> %acc, <vscale x 8 x i16> %input){
-; CHECK-SVE2-LABEL: unsigned_wide_add_nxv8i16:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: uaddwb z0.s, z0.s, z1.h
-; CHECK-SVE2-NEXT: uaddwt z0.s, z0.s, z1.h
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: unsigned_wide_add_nxv8i16:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: uunpklo z2.s, z1.h
@@ -121,19 +71,11 @@ define <vscale x 4 x i32> @unsigned_wide_add_nxv8i16(<vscale x 4 x i32> %acc, <v
; CHECK-SVE-NEXT: add z0.s, z0.s, z1.s
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: unsigned_wide_add_nxv8i16:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.s, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.s, z0.s, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: unsigned_wide_add_nxv8i16:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.s, z0.s, z1.h
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.s, z0.s, z1.h
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: unsigned_wide_add_nxv8i16:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uaddwb z0.s, z0.s, z1.h
+; CHECK-SVE2-NEXT: uaddwt z0.s, z0.s, z1.h
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = zext <vscale x 8 x i16> %input to <vscale x 8 x i32>
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> %acc, <vscale x 8 x i32> %input.wide)
@@ -141,12 +83,6 @@ entry:
}
define <vscale x 8 x i16> @signed_wide_add_nxv16i8(<vscale x 8 x i16> %acc, <vscale x 16 x i8> %input){
-; CHECK-SVE2-LABEL: signed_wide_add_nxv16i8:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: saddwb z0.h, z0.h, z1.b
-; CHECK-SVE2-NEXT: saddwt z0.h, z0.h, z1.b
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: signed_wide_add_nxv16i8:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: sunpklo z2.h, z1.b
@@ -155,19 +91,11 @@ define <vscale x 8 x i16> @signed_wide_add_nxv16i8(<vscale x 8 x i16> %acc, <vsc
; CHECK-SVE-NEXT: add z0.h, z0.h, z1.h
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: signed_wide_add_nxv16i8:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.h, z1.b
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z1.h, z1.b
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.h, z0.h, z2.h
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.h, z0.h, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: signed_wide_add_nxv16i8:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.h, z0.h, z1.b
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.h, z0.h, z1.b
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: signed_wide_add_nxv16i8:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: saddwb z0.h, z0.h, z1.b
+; CHECK-SVE2-NEXT: saddwt z0.h, z0.h, z1.b
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = sext <vscale x 16 x i8> %input to <vscale x 16 x i16>
%partial.reduce = tail call <vscale x 8 x i16> @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i16(<vscale x 8 x i16> %acc, <vscale x 16 x i16> %input.wide)
@@ -175,12 +103,6 @@ entry:
}
define <vscale x 8 x i16> @unsigned_wide_add_nxv16i8(<vscale x 8 x i16> %acc, <vscale x 16 x i8> %input){
-; CHECK-SVE2-LABEL: unsigned_wide_add_nxv16i8:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: uaddwb z0.h, z0.h, z1.b
-; CHECK-SVE2-NEXT: uaddwt z0.h, z0.h, z1.b
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: unsigned_wide_add_nxv16i8:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: uunpklo z2.h, z1.b
@@ -189,19 +111,11 @@ define <vscale x 8 x i16> @unsigned_wide_add_nxv16i8(<vscale x 8 x i16> %acc, <v
; CHECK-SVE-NEXT: add z0.h, z0.h, z1.h
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: unsigned_wide_add_nxv16i8:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.h, z1.b
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.h, z1.b
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.h, z0.h, z2.h
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.h, z0.h, z1.h
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: unsigned_wide_add_nxv16i8:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.h, z0.h, z1.b
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.h, z0.h, z1.b
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: unsigned_wide_add_nxv16i8:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uaddwb z0.h, z0.h, z1.b
+; CHECK-SVE2-NEXT: uaddwt z0.h, z0.h, z1.b
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = zext <vscale x 16 x i8> %input to <vscale x 16 x i16>
%partial.reduce = tail call <vscale x 8 x i16> @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i16(<vscale x 8 x i16> %acc, <vscale x 16 x i16> %input.wide)
@@ -209,16 +123,6 @@ entry:
}
define <vscale x 2 x i32> @signed_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <vscale x 4 x i16> %input){
-; CHECK-SVE2-LABEL: signed_wide_add_nxv4i16:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: ptrue p0.s
-; CHECK-SVE2-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-SVE2-NEXT: uunpklo z2.d, z1.s
-; CHECK-SVE2-NEXT: uunpkhi z1.d, z1.s
-; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
-; CHECK-SVE2-NEXT: add z0.d, z1.d, z0.d
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: signed_wide_add_nxv4i16:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: ptrue p0.s
@@ -229,23 +133,13 @@ define <vscale x 2 x i32> @signed_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <vsc
; CHECK-SVE-NEXT: add z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: signed_wide_add_nxv4i16:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: ptrue p0.s
-; CHECK-NEWLOWERING-SVE-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z1.d, z0.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: signed_wide_add_nxv4i16:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: ptrue p0.s
-; CHECK-NEWLOWERING-SVE2-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: signed_wide_add_nxv4i16:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: ptrue p0.s
+; CHECK-SVE2-NEXT: sxth z1.s, p0/m, z1.s
+; CHECK-SVE2-NEXT: saddwb z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: saddwt z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = sext <vscale x 4 x i16> %input to <vscale x 4 x i32>
%partial.reduce = tail call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32(<vscale x 2 x i32> %acc, <vscale x 4 x i32> %input.wide)
@@ -253,15 +147,6 @@ entry:
}
define <vscale x 2 x i32> @unsigned_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <vscale x 4 x i16> %input){
-; CHECK-SVE2-LABEL: unsigned_wide_add_nxv4i16:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-SVE2-NEXT: uunpklo z2.d, z1.s
-; CHECK-SVE2-NEXT: uunpkhi z1.d, z1.s
-; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
-; CHECK-SVE2-NEXT: add z0.d, z1.d, z0.d
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: unsigned_wide_add_nxv4i16:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: and z1.s, z1.s, #0xffff
@@ -271,21 +156,12 @@ define <vscale x 2 x i32> @unsigned_wide_add_nxv4i16(<vscale x 2 x i32> %acc, <v
; CHECK-SVE-NEXT: add z0.d, z1.d, z0.d
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: unsigned_wide_add_nxv4i16:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z1.d, z0.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: unsigned_wide_add_nxv4i16:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z1.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: unsigned_wide_add_nxv4i16:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SVE2-NEXT: uaddwb z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: uaddwt z0.d, z0.d, z1.s
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = zext <vscale x 4 x i16> %input to <vscale x 4 x i32>
%partial.reduce = tail call <vscale x 2 x i32> @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32(<vscale x 2 x i32> %acc, <vscale x 4 x i32> %input.wide)
@@ -293,18 +169,6 @@ entry:
}
define <vscale x 4 x i64> @signed_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
-; CHECK-SVE2-LABEL: signed_wide_add_nxv8i32:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: sunpklo z4.d, z3.s
-; CHECK-SVE2-NEXT: sunpklo z5.d, z2.s
-; CHECK-SVE2-NEXT: sunpkhi z3.d, z3.s
-; CHECK-SVE2-NEXT: sunpkhi z2.d, z2.s
-; CHECK-SVE2-NEXT: add z0.d, z0.d, z5.d
-; CHECK-SVE2-NEXT: add z1.d, z1.d, z4.d
-; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
-; CHECK-SVE2-NEXT: add z1.d, z1.d, z3.d
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: signed_wide_add_nxv8i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: sunpklo z4.d, z3.s
@@ -317,25 +181,13 @@ define <vscale x 4 x i64> @signed_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vsc
; CHECK-SVE-NEXT: add z1.d, z1.d, z3.d
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: signed_wide_add_nxv8i32:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z4.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z5.d, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z5.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z1.d, z1.d, z4.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z1.d, z1.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: signed_wide_add_nxv8i32:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z1.d, z1.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z2.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z1.d, z1.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z2.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: signed_wide_add_nxv8i32:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: saddwb z1.d, z1.d, z3.s
+; CHECK-SVE2-NEXT: saddwb z0.d, z0.d, z2.s
+; CHECK-SVE2-NEXT: saddwt z1.d, z1.d, z3.s
+; CHECK-SVE2-NEXT: saddwt z0.d, z0.d, z2.s
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = sext <vscale x 8 x i32> %input to <vscale x 8 x i64>
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)
@@ -343,18 +195,6 @@ entry:
}
define <vscale x 4 x i64> @unsigned_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <vscale x 8 x i32> %input){
-; CHECK-SVE2-LABEL: unsigned_wide_add_nxv8i32:
-; CHECK-SVE2: // %bb.0: // %entry
-; CHECK-SVE2-NEXT: uunpklo z4.d, z3.s
-; CHECK-SVE2-NEXT: uunpklo z5.d, z2.s
-; CHECK-SVE2-NEXT: uunpkhi z3.d, z3.s
-; CHECK-SVE2-NEXT: uunpkhi z2.d, z2.s
-; CHECK-SVE2-NEXT: add z0.d, z0.d, z5.d
-; CHECK-SVE2-NEXT: add z1.d, z1.d, z4.d
-; CHECK-SVE2-NEXT: add z0.d, z0.d, z2.d
-; CHECK-SVE2-NEXT: add z1.d, z1.d, z3.d
-; CHECK-SVE2-NEXT: ret
-;
; CHECK-SVE-LABEL: unsigned_wide_add_nxv8i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: uunpklo z4.d, z3.s
@@ -367,25 +207,13 @@ define <vscale x 4 x i64> @unsigned_wide_add_nxv8i32(<vscale x 4 x i64> %acc, <v
; CHECK-SVE-NEXT: add z1.d, z1.d, z3.d
; CHECK-SVE-NEXT: ret
;
-; CHECK-NEWLOWERING-SVE-LABEL: unsigned_wide_add_nxv8i32:
-; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z4.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z5.d, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.d, z2.s
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z5.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z1.d, z1.d, z4.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
-; CHECK-NEWLOWERING-SVE-NEXT: add z1.d, z1.d, z3.d
-; CHECK-NEWLOWERING-SVE-NEXT: ret
-;
-; CHECK-NEWLOWERING-SVE2-LABEL: unsigned_wide_add_nxv8i32:
-; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z1.d, z1.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z2.s
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z1.d, z1.d, z3.s
-; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z2.s
-; CHECK-NEWLOWERING-SVE2-NEXT: ret
+; CHECK-SVE2-LABEL: unsigned_wide_add_nxv8i32:
+; CHECK-SVE2: // %bb.0: // %entry
+; CHECK-SVE2-NEXT: uaddwb z1.d, z1.d, z3.s
+; CHECK-SVE2-NEXT: uaddwb z0.d, z0.d, z2.s
+; CHECK-SVE2-NEXT: uaddwt z1.d, z1.d, z3.s
+; CHECK-SVE2-NEXT: uaddwt z0.d, z0.d, z2.s
+; CHECK-SVE2-NEXT: ret
entry:
%input.wide = zext <vscale x 8 x i32> %input to <vscale x 8 x i64>
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64(<vscale x 4 x i64> %acc, <vscale x 8 x i64> %input.wide)
More information about the llvm-commits
mailing list