[llvm] [LLVM] treat `@llvm.ssub.sat` the same as `@llvm.aarch64.neon.sqsub` (PR #140454)
Folkert de Vries via llvm-commits
llvm-commits at lists.llvm.org
Sun May 18 11:29:15 PDT 2025
https://github.com/folkertdev updated https://github.com/llvm/llvm-project/pull/140454
>From 857c65f5b437fd05d21fca1f808da82fb2919691 Mon Sep 17 00:00:00 2001
From: Spencer Abson <spencer.abson at arm.com>
Date: Sat, 29 Mar 2025 22:07:47 +0000
Subject: [PATCH 1/6] [AArch64] Lower saturating add/sub intrinsics to generic
ISD nodes
---
.../Target/AArch64/AArch64ISelLowering.cpp | 20 ++
.../lib/Target/AArch64/AArch64InstrFormats.td | 49 ++---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 24 +--
.../test/CodeGen/AArch64/arm64-neon-2velem.ll | 144 +++++---------
.../test/CodeGen/AArch64/arm64-neon-3vdiff.ll | 12 +-
llvm/test/CodeGen/AArch64/arm64-vmul.ll | 184 ++++++++++++++++++
6 files changed, 282 insertions(+), 151 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 293292d47dd48..0905901c5f69b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6235,6 +6235,26 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getNode(
AArch64ISD::URSHR_I, dl, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2)));
return SDValue();
+ case Intrinsic::aarch64_neon_sqadd:
+ if (Op.getValueType().isVector())
+ return DAG.getNode(ISD::SADDSAT, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ return SDValue();
+ case Intrinsic::aarch64_neon_sqsub:
+ if (Op.getValueType().isVector())
+ return DAG.getNode(ISD::SSUBSAT, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ return SDValue();
+ case Intrinsic::aarch64_neon_uqadd:
+ if (Op.getValueType().isVector())
+ return DAG.getNode(ISD::UADDSAT, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ return SDValue();
+ case Intrinsic::aarch64_neon_uqsub:
+ if (Op.getValueType().isVector())
+ return DAG.getNode(ISD::USUBSAT, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ return SDValue();
case Intrinsic::aarch64_sve_whilelt:
return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
/*IsEqual=*/false);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 5489541fcb318..6adf84879052f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6256,24 +6256,6 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
}
-multiclass SIMDThreeSameVectorExtraPatterns<string inst, SDPatternOperator OpNode> {
- def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)),
- (!cast<Instruction>(inst#"v8i8") V64:$LHS, V64:$RHS)>;
- def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
- (!cast<Instruction>(inst#"v4i16") V64:$LHS, V64:$RHS)>;
- def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
- (!cast<Instruction>(inst#"v2i32") V64:$LHS, V64:$RHS)>;
-
- def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)),
- (!cast<Instruction>(inst#"v16i8") V128:$LHS, V128:$RHS)>;
- def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
- (!cast<Instruction>(inst#"v8i16") V128:$LHS, V128:$RHS)>;
- def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
- (!cast<Instruction>(inst#"v4i32") V128:$LHS, V128:$RHS)>;
- def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
- (!cast<Instruction>(inst#"v2i64") V128:$LHS, V128:$RHS)>;
-}
-
// As above, but D sized elements unsupported.
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -9861,14 +9843,15 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
}
multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
- SDPatternOperator Accum> {
+ SDPatternOperator VecAcc,
+ SDPatternOperator ScalAcc> {
def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
V128, V64,
V128_lo, VectorIndexH,
asm, ".4s", ".4s", ".4h", ".h",
[(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_aarch64_neon_sqdmull
+ (VecAcc (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
(dup_v8i16 (v8i16 V128_lo:$Rm),
VectorIndexH:$idx)))))]> {
@@ -9883,8 +9866,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_aarch64_neon_sqdmull
+ (VecAcc (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull
(extract_high_v8i16 (v8i16 V128:$Rn)),
(extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))))]> {
bits<3> idx;
@@ -9898,8 +9881,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
V128, VectorIndexS,
asm, ".2d", ".2d", ".2s", ".s",
[(set (v2i64 V128:$dst),
- (Accum (v2i64 V128:$Rd),
- (v2i64 (int_aarch64_neon_sqdmull
+ (VecAcc (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull
(v2i32 V64:$Rn),
(dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
bits<2> idx;
@@ -9912,8 +9895,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$dst),
- (Accum (v2i64 V128:$Rd),
- (v2i64 (int_aarch64_neon_sqdmull
+ (VecAcc (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull
(extract_high_v4i32 (v4i32 V128:$Rn)),
(extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
bits<2> idx;
@@ -9930,8 +9913,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
let Inst{20} = idx{0};
}
- def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
- (i32 (vector_extract
+ def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
(v4i16 V64:$Rm))),
@@ -9942,8 +9925,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
(i64 0))>;
- def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
- (i32 (vector_extract
+ def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
(dup_v8i16 (v8i16 V128_lo:$Rm),
@@ -9959,8 +9942,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
FPR64Op, FPR32Op, V128, VectorIndexS,
asm, ".s", "", "", ".s",
[(set (i64 FPR64Op:$dst),
- (Accum (i64 FPR64Op:$Rd),
- (i64 (int_aarch64_neon_sqdmulls_scalar
+ (ScalAcc (i64 FPR64Op:$Rd),
+ (i64 (int_aarch64_neon_sqdmulls_scalar
(i32 FPR32Op:$Rn),
(i32 (vector_extract (v4i32 V128:$Rm),
VectorIndexS:$idx))))))]> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 010c7c391527f..9b256b2a7a878 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5811,12 +5811,12 @@ defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp
defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
-defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
+defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", saddsat>;
defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
-defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
+defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", ssubsat>;
defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
@@ -5830,10 +5830,10 @@ defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp
defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
-defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
+defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", uaddsat>;
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
-defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
+defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", usubsat>;
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
@@ -5842,12 +5842,6 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
int_aarch64_neon_sqrdmlsh>;
-// Extra saturate patterns, other than the intrinsics matches above
-defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
-defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
-defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
-defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
-
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
@@ -6563,10 +6557,8 @@ defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
-defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
- int_aarch64_neon_sqadd>;
-defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
- int_aarch64_neon_sqsub>;
+defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", saddsat>;
+defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", ssubsat>;
defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
int_aarch64_neon_sqdmull>;
defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
@@ -8125,9 +8117,9 @@ defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
-defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
+defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", saddsat,
int_aarch64_neon_sqadd>;
-defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
+defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", ssubsat,
int_aarch64_neon_sqsub>;
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
int_aarch64_neon_sqrdmlah>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
index c3ad3b4192cf9..577b80a1f52e1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -16,6 +16,14 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_lane_f32_0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_laneq_f32_0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsd_laneq_f64_0
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_lane_s16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_lane_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_high_lane_s16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_high_lane_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_lane_s16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_lane_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_high_lane_s16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_high_lane_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_lane_s16_intrinsic
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s16_intrinsic_lo
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s16_intrinsic_hi
@@ -1682,18 +1690,11 @@ entry:
}
define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-SD-LABEL: test_vqdmlal_high_lane_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlal_high_lane_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlal_high_lane_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1703,18 +1704,11 @@ entry:
}
define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-SD-LABEL: test_vqdmlal_high_lane_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlal_high_lane_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlal_high_lane_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1750,18 +1744,11 @@ entry:
}
define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1771,18 +1758,11 @@ entry:
}
define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -4036,18 +4016,11 @@ entry:
}
define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-SD-LABEL: test_vqdmlal_high_lane_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlal_high_lane_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlal v0.4s, v1.4h, v2.h[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -4057,18 +4030,11 @@ entry:
}
define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-SD-LABEL: test_vqdmlal_high_lane_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlal_high_lane_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -4104,18 +4070,11 @@ entry:
}
define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s16_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s16_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -4125,18 +4084,11 @@ entry:
}
define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s32_0:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-SD-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s32_0:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index 9fb8e4c8fe031..bd28d13973f9c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -2539,16 +2539,16 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coer
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: lsr x8, x0, #16
; CHECK-NEXT: movi v1.2d, #0xffff0000ffff0000
-; CHECK-NEXT: fmov d5, x0
; CHECK-NEXT: rev32 v4.8h, v0.8h
; CHECK-NEXT: dup v2.8h, w8
; CHECK-NEXT: sqneg v3.8h, v2.8h
; CHECK-NEXT: bsl v1.16b, v2.16b, v3.16b
-; CHECK-NEXT: sqdmull v2.4s, v0.4h, v5.h[0]
-; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v5.h[0]
-; CHECK-NEXT: sqdmlal v2.4s, v4.4h, v1.4h
-; CHECK-NEXT: sqdmlal2 v0.4s, v4.8h, v1.8h
-; CHECK-NEXT: uzp2 v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: fmov d3, x0
+; CHECK-NEXT: sqdmull v2.4s, v4.4h, v1.4h
+; CHECK-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h
+; CHECK-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0]
+; CHECK-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0]
+; CHECK-NEXT: uzp2 v0.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
entry:
%scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 499786470d4ac..937a17ca6c1e0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -2863,3 +2863,187 @@ define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
%prod = mul <1 x i64> %lhs, %rhs
ret <1 x i64> %prod
}
+
+define <4 x i32> @sqdmlal4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) {
+; CHECK-LABEL: sqdmlal4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlal.4s v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %v2)
+ %sum = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlal2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) {
+; CHECK-LABEL: sqdmlal2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlal.2d v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %v2)
+ %sum = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp)
+ ret <2 x i64> %sum
+}
+
+define <4 x i32> @sqdmlal2_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) {
+; CHECK-LABEL: sqdmlal2_4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlal2.4s v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp2 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp0, <4 x i16> %tmp1)
+ %sum = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp2)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlal2_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: sqdmlal2_2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlal2.2d v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %tmp2 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp0, <2 x i32> %tmp1)
+ %sum = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp2)
+ ret <2 x i64> %sum
+}
+
+define <4 x i32> @sqdmlal_lane_4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) {
+; CHECK-LABEL: sqdmlal_lane_4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlal.4s v0, v1, v2[3]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <4 x i16> %v2, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %tmp1 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %tmp0)
+ %sum = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp1)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlal_lane_2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) {
+; CHECK-LABEL: sqdmlal_lane_2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlal.2d v0, v1, v2[1]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <2 x i32> %v2, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp1 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %tmp0)
+ %sum = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp1)
+ ret <2 x i64> %sum
+}
+
+define <4 x i32> @sqdmlal2_lane_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) {
+; CHECK-LABEL: sqdmlal2_lane_4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlal2.4s v0, v1, v2[7]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+ %tmp2 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp0, <4 x i16> %tmp1)
+ %sum = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp2)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlal2_lane_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: sqdmlal2_lane_2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlal2.2d v0, v1, v2[1]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp2 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp0, <2 x i32> %tmp1)
+ %sum = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp2)
+ ret <2 x i64> %sum
+}
+
+define <4 x i32> @sqdmlsl4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) {
+; CHECK-LABEL: sqdmlsl4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlsl.4s v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %v2)
+ %sum = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlsl2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) {
+; CHECK-LABEL: sqdmlsl2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlsl.2d v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %v2)
+ %sum = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp)
+ ret <2 x i64> %sum
+}
+
+define <4 x i32> @sqdmlsl2_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) {
+; CHECK-LABEL: sqdmlsl2_4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlsl2.4s v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp2 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp0, <4 x i16> %tmp1)
+ %sum = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp2)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlsl2_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: sqdmlsl2_2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlsl2.2d v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %tmp2 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp0, <2 x i32> %tmp1)
+ %sum = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp2)
+ ret <2 x i64> %sum
+}
+
+define <4 x i32> @sqdmlsl_lane_4s_lib(<4 x i32> %dst, <4 x i16> %v1, <4 x i16> %v2) {
+; CHECK-LABEL: sqdmlsl_lane_4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlsl.4s v0, v1, v2[3]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <4 x i16> %v2, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %tmp1 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %v1, <4 x i16> %tmp0)
+ %sum = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp1)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlsl_lane_2d_lib(<2 x i64> %dst, <2 x i32> %v1, <2 x i32> %v2) {
+; CHECK-LABEL: sqdmlsl_lane_2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: sqdmlsl.2d v0, v1, v2[1]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <2 x i32> %v2, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp1 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %v1, <2 x i32> %tmp0)
+ %sum = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp1)
+ ret <2 x i64> %sum
+}
+
+define <4 x i32> @sqdmlsl2_lane_4s_lib(<4 x i32> %dst, <8 x i16> %v1, <8 x i16> %v2) {
+; CHECK-LABEL: sqdmlsl2_lane_4s_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlsl2.4s v0, v1, v2[7]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <8 x i16> %v1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp1 = shufflevector <8 x i16> %v2, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+ %tmp2 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp0, <4 x i16> %tmp1)
+ %sum = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %dst, <4 x i32> %tmp2)
+ ret <4 x i32> %sum
+}
+
+define <2 x i64> @sqdmlsl2_lane_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: sqdmlsl2_lane_2d_lib:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmlsl2.2d v0, v1, v2[1]
+; CHECK-NEXT: ret
+ %tmp0 = shufflevector <4 x i32> %v1, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %tmp1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <2 x i32> <i32 1, i32 1>
+ %tmp2 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp0, <2 x i32> %tmp1)
+ %sum = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %dst, <2 x i64> %tmp2)
+ ret <2 x i64> %sum
+}
>From ef9b896c43356e420c47da23f98671026f890be8 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sun, 18 May 2025 15:28:33 +0200
Subject: [PATCH 2/6] lower saturating add/sub in aarch64 GISel
---
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7ff2e55e802c5..b563d6222a2cb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1795,6 +1795,14 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_neon_sqadd:
+ return LowerBinOp(TargetOpcode::G_SADDSAT);
+ case Intrinsic::aarch64_neon_sqsub:
+ return LowerBinOp(TargetOpcode::G_SSUBSAT);
+ case Intrinsic::aarch64_neon_uqadd:
+ return LowerBinOp(TargetOpcode::G_UADDSAT);
+ case Intrinsic::aarch64_neon_uqsub:
+ return LowerBinOp(TargetOpcode::G_USUBSAT);
case Intrinsic::vector_reverse:
// TODO: Add support for vector_reverse
>From 1af461cd05b793abe9439bcd1bbff81f9808c59d Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sun, 18 May 2025 18:14:18 +0200
Subject: [PATCH 3/6] only for vectors
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 36 ++++++++++++++-----
1 file changed, 28 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b563d6222a2cb..9db3e797b35cb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1795,14 +1795,34 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
- case Intrinsic::aarch64_neon_sqadd:
- return LowerBinOp(TargetOpcode::G_SADDSAT);
- case Intrinsic::aarch64_neon_sqsub:
- return LowerBinOp(TargetOpcode::G_SSUBSAT);
- case Intrinsic::aarch64_neon_uqadd:
- return LowerBinOp(TargetOpcode::G_UADDSAT);
- case Intrinsic::aarch64_neon_uqsub:
- return LowerBinOp(TargetOpcode::G_USUBSAT);
+ case Intrinsic::aarch64_neon_sqadd: {
+ MachineIRBuilder MIB(MI);
+ if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
+ return LowerBinOp(TargetOpcode::G_SADDSAT);
+
+ break;
+ }
+ case Intrinsic::aarch64_neon_sqsub: {
+ MachineIRBuilder MIB(MI);
+ if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
+ return LowerBinOp(TargetOpcode::G_SSUBSAT);
+
+ break;
+ }
+ case Intrinsic::aarch64_neon_uqadd: {
+ MachineIRBuilder MIB(MI);
+ if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
+ return LowerBinOp(TargetOpcode::G_UADDSAT);
+
+ break;
+ }
+ case Intrinsic::aarch64_neon_uqsub: {
+ MachineIRBuilder MIB(MI);
+ if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
+ return LowerBinOp(TargetOpcode::G_USUBSAT);
+
+ break;
+ }
case Intrinsic::vector_reverse:
// TODO: Add support for vector_reverse
>From fa67645c0e837c9618383f50d48ed1ab9157f6b0 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sun, 18 May 2025 18:34:59 +0200
Subject: [PATCH 4/6] apparently these warnings are gone again
---
llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll | 8 --------
1 file changed, 8 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
index 577b80a1f52e1..91b4d68872847 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -16,14 +16,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_lane_f32_0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmss_laneq_f32_0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vfmsd_laneq_f64_0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_lane_s16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_lane_s32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_high_lane_s16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlal_high_lane_s32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_lane_s16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_lane_s32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_high_lane_s16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmlsl_high_lane_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_lane_s16_intrinsic
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s16_intrinsic_lo
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vqdmulh_laneq_s16_intrinsic_hi
>From 7f4b04d0d374df33650ccc00cb2fbad08740951e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sun, 18 May 2025 20:19:12 +0200
Subject: [PATCH 5/6] declare `MIB` and `MRI` up top
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 30 +++++--------------
1 file changed, 7 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9db3e797b35cb..8ac5ea8b6a5a1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1630,6 +1630,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
};
+ MachineIRBuilder MIB(MI);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::vacopy: {
@@ -1642,7 +1645,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineFunction &MF = *MI.getMF();
auto Val = MF.getRegInfo().createGenericVirtualRegister(
LLT::scalar(VaListSize * 8));
- MachineIRBuilder MIB(MI);
MIB.buildLoad(Val, MI.getOperand(2),
*MF.getMachineMemOperand(MachinePointerInfo(),
MachineMemOperand::MOLoad,
@@ -1664,14 +1666,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
// Anyext the value being set to 64 bit (only the bottom 8 bits are read by
// the instruction).
- MachineIRBuilder MIB(MI);
auto &Value = MI.getOperand(3);
Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
Value.setReg(ExtValueReg);
return true;
}
case Intrinsic::aarch64_prefetch: {
- MachineIRBuilder MIB(MI);
auto &AddrVal = MI.getOperand(1);
int64_t IsWrite = MI.getOperand(2).getImm();
@@ -1694,8 +1694,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::aarch64_neon_smaxv:
case Intrinsic::aarch64_neon_uminv:
case Intrinsic::aarch64_neon_sminv: {
- MachineIRBuilder MIB(MI);
- MachineRegisterInfo &MRI = *MIB.getMRI();
bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
IntrinsicID == Intrinsic::aarch64_neon_sminv;
@@ -1720,8 +1718,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
}
case Intrinsic::aarch64_neon_uaddlp:
case Intrinsic::aarch64_neon_saddlp: {
- MachineIRBuilder MIB(MI);
-
unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
? AArch64::G_UADDLP
: AArch64::G_SADDLP;
@@ -1732,9 +1728,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
}
case Intrinsic::aarch64_neon_uaddlv:
case Intrinsic::aarch64_neon_saddlv: {
- MachineIRBuilder MIB(MI);
- MachineRegisterInfo &MRI = *MIB.getMRI();
-
unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
? AArch64::G_UADDLV
: AArch64::G_SADDLV;
@@ -1790,37 +1783,28 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(AArch64::G_UMULL);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
- MachineIRBuilder MIB(MI);
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
MI.eraseFromParent();
return true;
}
case Intrinsic::aarch64_neon_sqadd: {
- MachineIRBuilder MIB(MI);
if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
- return LowerBinOp(TargetOpcode::G_SADDSAT);
-
+ return LowerBinOp(TargetOpcode::G_SADDSAT);
break;
}
case Intrinsic::aarch64_neon_sqsub: {
- MachineIRBuilder MIB(MI);
if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
- return LowerBinOp(TargetOpcode::G_SSUBSAT);
-
+ return LowerBinOp(TargetOpcode::G_SSUBSAT);
break;
}
case Intrinsic::aarch64_neon_uqadd: {
- MachineIRBuilder MIB(MI);
if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
- return LowerBinOp(TargetOpcode::G_UADDSAT);
-
+ return LowerBinOp(TargetOpcode::G_UADDSAT);
break;
}
case Intrinsic::aarch64_neon_uqsub: {
- MachineIRBuilder MIB(MI);
if (MIB.getMRI()->getType(MI.getOperand(0).getReg()).isVector())
- return LowerBinOp(TargetOpcode::G_USUBSAT);
-
+ return LowerBinOp(TargetOpcode::G_USUBSAT);
break;
}
>From 5cf3cc0ae4ee0748850d0c1a4b256fff4e6fa848 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Sun, 18 May 2025 20:28:01 +0200
Subject: [PATCH 6/6] revert changes to tests
---
.../test/CodeGen/AArch64/arm64-neon-2velem.ll | 136 ++++++++++++------
1 file changed, 96 insertions(+), 40 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
index 91b4d68872847..c3ad3b4192cf9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -1682,11 +1682,18 @@ entry:
}
define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1696,11 +1703,18 @@ entry:
}
define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -1736,11 +1750,18 @@ entry:
}
define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -1750,11 +1771,18 @@ entry:
}
define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -4008,11 +4036,18 @@ entry:
}
define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -4022,11 +4057,18 @@ entry:
}
define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlal_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlal_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -4062,11 +4104,18 @@ entry:
}
define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s16_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s16_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -4076,11 +4125,18 @@ entry:
}
define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vqdmlsl_high_lane_s32_0:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vqdmlsl_high_lane_s32_0:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[0]
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
More information about the llvm-commits
mailing list