[llvm] [AArch64] Add bitcasts for lowering saturating add/sub and shift intrinsics. (PR #161840)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 04:18:12 PST 2025
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/161840
>From a29ca01c6f0b0250fd7ea136732b653f3c61040a Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Fri, 3 Oct 2025 12:48:31 +0000
Subject: [PATCH] [AArch64][GlobalISel] Add explicit bitcast when lowering
saturating add/sub and shift intrinsics.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 92 ++++++-
.../lib/Target/AArch64/AArch64InstrFormats.td | 11 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 43 +++-
llvm/test/CodeGen/AArch64/arm64-int-neon.ll | 225 ++++++++++++++++++
llvm/test/CodeGen/AArch64/arm64-vmul.ll | 52 ++--
llvm/test/CodeGen/AArch64/arm64-vshift.ll | 34 +--
6 files changed, 380 insertions(+), 77 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-int-neon.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9078675da0e95..721aea2a4c8d3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4488,6 +4488,25 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues({Sum, OutFlag}, DL);
}
+static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT OrigVT = Op.getValueType();
+ assert((OrigVT == MVT::i32 || OrigVT == MVT::i64) &&
+ "lowerIntNeonIntrinsic expects 32/64-bit scalar operation.");
+
+ EVT NodeVT = (OrigVT == MVT::i32) ? MVT::f32 : MVT::f64;
+
+ SmallVector<SDValue, 2> NewOps;
+ NewOps.reserve(Op.getNumOperands() - 1);
+
+ for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
+ NewOps.push_back(DAG.getBitcast(NodeVT, Op.getOperand(I)));
+
+ SDValue OpNode = DAG.getNode(Opcode, DL, NodeVT, NewOps);
+ return DAG.getBitcast(OrigVT, OpNode);
+}
+
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
@@ -6359,26 +6378,45 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
+ case Intrinsic::aarch64_neon_sqrshl:
+ if (Op.getValueType().isVector())
+ return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHL, DAG);
+ case Intrinsic::aarch64_neon_sqshl:
+ if (Op.getValueType().isVector())
+ return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHL, DAG);
+ case Intrinsic::aarch64_neon_uqrshl:
+ if (Op.getValueType().isVector())
+ return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHL, DAG);
+ case Intrinsic::aarch64_neon_uqshl:
+ if (Op.getValueType().isVector())
+ return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHL, DAG);
case Intrinsic::aarch64_neon_sqadd:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::SADDSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQADD, DAG);
+
case Intrinsic::aarch64_neon_sqsub:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::SSUBSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSUB, DAG);
+
case Intrinsic::aarch64_neon_uqadd:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::UADDSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQADD, DAG);
case Intrinsic::aarch64_neon_uqsub:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::USUBSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSUB, DAG);
+
case Intrinsic::aarch64_sve_whilelt:
return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
/*IsEqual=*/false);
@@ -6713,6 +6751,52 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::experimental_vector_match: {
return LowerVectorMatch(Op, DAG);
}
+ // case Intrinsic::aarch64_neon_fcvtas:
+ // case Intrinsic::aarch64_neon_fcvtau:
+ // case Intrinsic::aarch64_neon_fcvtms:
+ // case Intrinsic::aarch64_neon_fcvtmu:
+ // case Intrinsic::aarch64_neon_fcvtns:
+ // case Intrinsic::aarch64_neon_fcvtnu:
+ // case Intrinsic::aarch64_neon_fcvtps:
+ // case Intrinsic::aarch64_neon_fcvtpu:
+ // case Intrinsic::aarch64_neon_fcvtzs:
+ // case Intrinsic::aarch64_neon_fcvtzu:
+ // case Intrinsic::aarch64_neon_sqabs:
+ // case Intrinsic::aarch64_neon_sqneg:
+ // case Intrinsic::aarch64_neon_scalar_sqxtn:
+ // case Intrinsic::aarch64_neon_scalar_sqxtun:
+ // case Intrinsic::aarch64_neon_scalar_uqxtn:
+ // case Intrinsic::aarch64_neon_sqadd:
+ // case Intrinsic::aarch64_neon_sqdmulh:
+ // case Intrinsic::aarch64_neon_sqrdmulh:
+ // case Intrinsic::aarch64_neon_sqrshl:
+ // case Intrinsic::aarch64_neon_sqshl:
+ // case Intrinsic::aarch64_neon_sqshlu:
+ // case Intrinsic::aarch64_neon_sqsub:
+ // case Intrinsic::aarch64_neon_srshl:
+ // case Intrinsic::aarch64_neon_sshl:
+ // case Intrinsic::aarch64_neon_suqadd:
+ // case Intrinsic::aarch64_neon_uqadd:
+ // case Intrinsic::aarch64_neon_uqrshl:
+ // case Intrinsic::aarch64_neon_uqshl:
+ // case Intrinsic::aarch64_neon_uqsub:
+ // case Intrinsic::aarch64_neon_urshl:
+ // case Intrinsic::aarch64_neon_ushl:
+ // case Intrinsic::aarch64_neon_usqadd:
+ // case Intrinsic::aarch64_neon_rshrn:
+ // case Intrinsic::aarch64_neon_sqrshrn:
+ // case Intrinsic::aarch64_neon_sqrshrun:
+ // case Intrinsic::aarch64_neon_sqshrn:
+ // case Intrinsic::aarch64_neon_sqshrun:
+ // case Intrinsic::aarch64_neon_uqrshrn:
+ // case Intrinsic::aarch64_neon_uqshrn:
+ // case Intrinsic::aarch64_neon_sqdmulh_lane:
+ // case Intrinsic::aarch64_neon_sqdmulh_laneq:
+ // case Intrinsic::aarch64_neon_sqrdmulh_lane:
+ // case Intrinsic::aarch64_neon_sqrdmulh_laneq:
+ // case Intrinsic::aarch64_neon_sqrdmlah:
+ // case Intrinsic::aarch64_neon_sqrdmlsh:
+ // case Intrinsic::aarch64_neon_abs:{
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f07d3514d1a99..28314d3aa7fac 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7703,16 +7703,21 @@ multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
}
multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode, SDPatternOperator SatOp> {
+ SDPatternOperator OpNode, SDPatternOperator G_OpNode, SDPatternOperator SatOp> {
def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (SatOp (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
- def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ def : Pat<(i64 (G_OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
- def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
+ def : Pat<(i32 (G_OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
+ (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
+
+ def : Pat<(f64 (OpNode FPR64:$Rn, FPR64:$Rm)),
+ (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
+ def : Pat<(f32 (OpNode FPR32:$Rn, FPR32:$Rm)),
(!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f788c7510f80c..3cc75ff43f7a3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1000,6 +1000,25 @@ def AArch64fcvtnu_half : SDNode<"AArch64ISD::FCVTNU_HALF", SDTFPExtendOp>;
def AArch64fcvtps_half : SDNode<"AArch64ISD::FCVTPS_HALF", SDTFPExtendOp>;
def AArch64fcvtpu_half : SDNode<"AArch64ISD::FCVTPU_HALF", SDTFPExtendOp>;
+def AArch64sqadd_node: SDNode<"AArch64ISD::SQADD", SDTFPBinOp>;
+def AArch64sqrshl: SDNode<"AArch64ISD::SQRSHL", SDTFPBinOp>;
+def AArch64sqshl: SDNode<"AArch64ISD::SQSHL", SDTFPBinOp>;
+def AArch64sqsub_node: SDNode<"AArch64ISD::SQSUB", SDTFPBinOp>;
+def AArch64uqadd: SDNode<"AArch64ISD::UQADD", SDTFPBinOp>;
+def AArch64uqrshl: SDNode<"AArch64ISD::UQRSHL", SDTFPBinOp>;
+def AArch64uqshl: SDNode<"AArch64ISD::UQSHL", SDTFPBinOp>;
+def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;
+
+// This patfrags are temporary hack to get around pattern matching issues with not yet updated intrinsics.
+def AArch64sqadd: PatFrags<(ops node:$lhs, node:$rhs),
+ [(bitconvert (AArch64sqadd_node (f32 (bitconvert node:$lhs)), (f32 (bitconvert node:$rhs)))),
+ (bitconvert (AArch64sqadd_node (f64 (bitconvert node:$lhs)), (f64 (bitconvert node:$rhs)))),
+ (int_aarch64_neon_sqadd node:$lhs, node:$rhs)]>;
+def AArch64sqsub: PatFrags<(ops node:$lhs, node:$rhs),
+ [(bitconvert (AArch64sqsub_node (f32 (bitconvert node:$lhs)), (f32 (bitconvert node:$rhs)))),
+ (bitconvert (AArch64sqsub_node (f64 (bitconvert node:$lhs)), (f64 (bitconvert node:$rhs)))),
+ (int_aarch64_neon_sqsub node:$lhs, node:$rhs)]>;
+
//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
// Vector immediate ops
@@ -6453,19 +6472,19 @@ defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
-defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd, saddsat>;
+defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", AArch64sqadd_node, int_aarch64_neon_sqadd, saddsat>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
-defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
-defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
-defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub, ssubsat>;
+defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", AArch64sqrshl, int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
+defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", AArch64sqshl, int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
+defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", AArch64sqsub_node, int_aarch64_neon_sqsub, ssubsat>;
defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
-defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd, uaddsat>;
-defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
-defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
-defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub, usubsat>;
+defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", AArch64uqadd, int_aarch64_neon_uqadd, uaddsat>;
+defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", AArch64uqrshl, int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
+defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", AArch64uqshl, int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
+defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", AArch64uqsub, int_aarch64_neon_uqsub, usubsat>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
let Predicates = [HasRDM] in {
@@ -6520,11 +6539,11 @@ defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
-def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
+def : Pat<(i64 (AArch64sqadd (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),
(SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
+def : Pat<(i64 (AArch64sqsub (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),
(SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
@@ -8545,9 +8564,9 @@ defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", saddsat,
- int_aarch64_neon_sqadd>;
+ AArch64sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", ssubsat,
- int_aarch64_neon_sqsub>;
+ AArch64sqsub>;
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
new file mode 100644
index 0000000000000..819c00cdd6815
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -0,0 +1,225 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK
+
+
+; CHECK-GI: warning: Instruction selection used fallback path for test_sqrshl_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshl_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshl_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshl_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshl_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshl_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshl_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshl_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64
+
+define i32 @test_sqrshl_s32(float noundef %a){
+; CHECK-LABEL: test_sqrshl_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: sqrshl s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_sqrshl_s64(float noundef %a){
+; CHECK-LABEL: test_sqrshl_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqrshl d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
+
+define i32 @test_sqshl_s32(float noundef %a) {
+; CHECK-LABEL: test_sqshl_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: sqshl s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqshl.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_sqshl_s64(float noundef %a) {
+; CHECK-LABEL: test_sqshl_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqshl d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.sqshl.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
+
+define i32 @test_uqrshl_s32(float noundef %a) {
+; CHECK-LABEL: test_uqrshl_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: uqrshl s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_uqrshl_s64(float noundef %a) {
+; CHECK-LABEL: test_uqrshl_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqrshl d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
+
+define i32 @test_uqshl_s32(float noundef %a) {
+; CHECK-LABEL: test_uqshl_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: uqshl s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqshl.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_uqshl_s64(float noundef %a) {
+; CHECK-LABEL: test_uqshl_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqshl d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.uqshl.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
+
+define i32 @test_sqadd_s32(float noundef %a) {
+; CHECK-LABEL: test_sqadd_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: sqadd s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_sqadd_s64(float noundef %a) {
+; CHECK-LABEL: test_sqadd_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqadd d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
+
+define i32 @test_sqsub_s32(float noundef %a) {
+; CHECK-LABEL: test_sqsub_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: sqsub s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_sqsub_s64(float noundef %a) {
+; CHECK-LABEL: test_sqsub_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqsub d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
+
+define i32 @test_uqadd_s32(float noundef %a) {
+; CHECK-LABEL: test_uqadd_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: uqadd s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_uqadd_s64(float noundef %a) {
+; CHECK-LABEL: test_uqadd_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqadd d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
+
+define i32 @test_uqsub_s32(float noundef %a) {
+; CHECK-LABEL: test_uqsub_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: uqsub s0, s0, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %cvt, i32 %cvt)
+ ret i32 %res
+}
+
+define i64 @test_uqsub_s64(float noundef %a) {
+; CHECK-LABEL: test_uqsub_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqsub d0, d0, d0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %cvt, i64 %cvt)
+ ret i64 %res
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index e6df9f2fb2c56..fed7439bf95fb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -1766,24 +1766,14 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind {
-; CHECK-SD-LABEL: sqadd_lane1_sqdmull4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT: mov w8, v0.s[1]
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: sqadd s0, s0, s1
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sqadd_lane1_sqdmull4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: mov s0, v0.s[1]
-; CHECK-GI-NEXT: sqadd s0, s1, s0
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sqadd_lane1_sqdmull4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: sqadd s0, s1, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
%prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C)
%prod = extractelement <4 x i32> %prod.vec, i32 1
%res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod)
@@ -1791,24 +1781,14 @@ define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind {
}
define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind {
-; CHECK-SD-LABEL: sqsub_lane1_sqdmull4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT: mov w8, v0.s[1]
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: sqsub s0, s0, s1
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sqsub_lane1_sqdmull4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: mov s0, v0.s[1]
-; CHECK-GI-NEXT: sqsub s0, s1, s0
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sqsub_lane1_sqdmull4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: sqsub s0, s1, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
%prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C)
%prod = extractelement <4 x i32> %prod.vec, i32 1
%res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod)
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 8ec5434085d6a..d27e2e69f8605 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -168,10 +168,8 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl_scalar:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -363,10 +361,8 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl_scalar:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -888,10 +884,8 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl_scalar:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -904,10 +898,9 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: sqrshl_scalar_constant:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x9, [x0]
-; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov x8, #1 // =0x1
+; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -997,10 +990,8 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind {
define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl_scalar:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: ldr x9, [x1]
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
@@ -1013,10 +1004,9 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-LABEL: uqrshl_scalar_constant:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr x9, [x0]
-; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: mov x8, #1 // =0x1
+; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list