[llvm] [AArch64] Add lowering for NEON saturating shift intrinsics (PR #171485)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 9 10:11:57 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (Lukacma)
<details>
<summary>Changes</summary>
This patch extends on the work done in #<!-- -->161840 and adds lowering with bitcasts for saturating shift intrinsics.
---
Patch is 23.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171485.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+17-9)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+12-9)
- (modified) llvm/test/CodeGen/AArch64/arm64-int-neon.ll (+92-2)
- (added) llvm/test/CodeGen/AArch64/arm64-int-neon.s (+325)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d1441a744eee8..815ed15ad4d1e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4561,7 +4561,8 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
}
static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool IsLastInt = false) {
SDLoc DL(Op);
auto getFloatVT = [](EVT VT) {
assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT");
@@ -4570,11 +4571,18 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
auto bitcastToFloat = [&](SDValue Val) {
return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
};
+
+ const unsigned NumOps = Op.getNumOperands();
+ const unsigned LastOpIdx = NumOps - 1;
SmallVector<SDValue, 2> NewOps;
- NewOps.reserve(Op.getNumOperands() - 1);
+ NewOps.reserve(NumOps - 1);
- for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
+ // Skip first operand as it is intrinsic ID.
+ for (unsigned I = 1, E = LastOpIdx; I < E; ++I)
NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
+ SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx)
+ : bitcastToFloat(Op.getOperand(LastOpIdx));
+ NewOps.push_back(LastOp);
EVT OrigVT = Op.getValueType();
SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps);
return DAG.getBitcast(OrigVT, OpNode);
@@ -6390,42 +6398,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG, true);
case Intrinsic::aarch64_neon_uqshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VLSHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG, true);
case Intrinsic::aarch64_neon_uqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::URSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshl:
if (Op.getValueType().isVector())
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4d2e740779961..1bdf37dc4b2c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -10177,7 +10177,7 @@ multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
FPR32, FPR64, vecshiftR32, asm,
- [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
+ [(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> {
let Inst{20-16} = imm{4-0};
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7ee094ad4ac87..819b84b48f7cf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1035,9 +1035,12 @@ def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;
def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL",
SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisFP<1>]>>;
-
-//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
-
+def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>;
+def AArch64uqshrn: SDNode<"AArch64ISD::UQSHRN", SDTFPTruncRoundOp>;
+def AArch64sqrshrn: SDNode<"AArch64ISD::SQRSHRN", SDTFPTruncRoundOp>;
+def AArch64uqrshrn: SDNode<"AArch64ISD::UQRSHRN", SDTFPTruncRoundOp>;
// Vector immediate ops
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
@@ -8902,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
- int_aarch64_neon_sqrshrn>;
+ AArch64sqrshrn>;
defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
- int_aarch64_neon_sqrshrun>;
+ AArch64sqrshrun>;
defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
- int_aarch64_neon_sqshrn>;
+ AArch64sqshrn>;
defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
- int_aarch64_neon_sqshrun>;
+ AArch64sqshrun>;
defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri", AArch64vsri>;
defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
@@ -8921,10 +8924,10 @@ defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
- int_aarch64_neon_uqrshrn>;
+ AArch64uqrshrn>;
defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
- int_aarch64_neon_uqshrn>;
+ AArch64uqshrn>;
defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
index e8ae8a3e53c9b..9b530534d00f7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -3,7 +3,13 @@
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32
+; CHECK-GI: warning: Instruction selection used fallback path for test_sqshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshrun_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrun_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64
@@ -113,6 +119,90 @@ entry:
ret i64 %res
}
+define void @test_sqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrun_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqshrun s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_uqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqrshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqrshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrun_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqrshrun s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_uqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqrshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqrshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
define i32 @test_sqadd_s32(float noundef %a) {
; CHECK-LABEL: test_sqadd_s32:
; CHECK: // %bb.0: // %entry
@@ -227,4 +317,4 @@ define i64 @test_sqdmulls_scalar(float %A){
%cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
%prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %cvt, i32 %cvt)
ret i64 %prod
-}
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.s b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
new file mode 100644
index 0000000000000..4599c60e82703
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
@@ -0,0 +1,325 @@
+ .file "arm64-int-neon.ll"
+ .text
+ .globl test_sqrshl_s32 // -- Begin function test_sqrshl_s32
+ .p2align 2
+ .type test_sqrshl_s32, at function
+test_sqrshl_s32: // @test_sqrshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ sqrshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end0:
+ .size test_sqrshl_s32, .Lfunc_end0-test_sqrshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqrshl_s64 // -- Begin function test_sqrshl_s64
+ .p2align 2
+ .type test_sqrshl_s64, at function
+test_sqrshl_s64: // @test_sqrshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqrshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end1:
+ .size test_sqrshl_s64, .Lfunc_end1-test_sqrshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshl_s32 // -- Begin function test_sqshl_s32
+ .p2align 2
+ .type test_sqshl_s32, at function
+test_sqshl_s32: // @test_sqshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ sqshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end2:
+ .size test_sqshl_s32, .Lfunc_end2-test_sqshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshl_s64 // -- Begin function test_sqshl_s64
+ .p2align 2
+ .type test_sqshl_s64, at function
+test_sqshl_s64: // @test_sqshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end3:
+ .size test_sqshl_s64, .Lfunc_end3-test_sqshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_uqrshl_s32 // -- Begin function test_uqrshl_s32
+ .p2align 2
+ .type test_uqrshl_s32, at function
+test_uqrshl_s32: // @test_uqrshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ uqrshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end4:
+ .size test_uqrshl_s32, .Lfunc_end4-test_uqrshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqrshl_s64 // -- Begin function test_uqrshl_s64
+ .p2align 2
+ .type test_uqrshl_s64, at function
+test_uqrshl_s64: // @test_uqrshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqrshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end5:
+ .size test_uqrshl_s64, .Lfunc_end5-test_uqrshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_uqshl_s32 // -- Begin function test_uqshl_s32
+ .p2align 2
+ .type test_uqshl_s32, at function
+test_uqshl_s32: // @test_uqshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ uqshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end6:
+ .size test_uqshl_s32, .Lfunc_end6-test_uqshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqshl_s64 // -- Begin function test_uqshl_s64
+ .p2align 2
+ .type test_uqshl_s64, at function
+test_uqshl_s64: // @test_uqshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end7:
+ .size test_uqshl_s64, .Lfunc_end7-test_uqshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshrn_s32 // -- Begin function test_sqshrn_s32
+ .p2align 2
+ .type test_sqshrn_s32, at function
+test_sqshrn_s32: // @test_sqshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end8:
+ .size test_sqshrn_s32, .Lfunc_end8-test_sqshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshrun_s32 // -- Begin function test_sqshrun_s32
+ .p2align 2
+ .type test_sqshrun_s32, at function
+test_sqshrun_s32: // @test_sqshrun_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqshrun s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end9:
+ .size test_sqshrun_s32, .Lfunc_end9-test_sqshrun_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqshrn_s32 // -- Begin function test_uqshrn_s32
+ .p2align 2
+ .type test_uqshrn_s32, at function
+test_uqshrn_s32: // @test_uqshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end10:
+ .size test_uqshrn_s32, .Lfunc_end10-test_uqshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqrshrn_s32 // -- Begin function test_sqrshrn_s32
+ .p2align 2
+ .type test_sqrshrn_s32, at function
+test_sqrshrn_s32: // @test_sqrshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqrshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end11:
+ .size test_sqrshrn_s32, .Lfunc_end11-test_sqrshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqrshrun_s32 // -- Begin function test_sqrshrun_s32
+ .p2align 2
+ .type test_sqrshrun_s32, at function
+test_sqrshrun_s32: // @test_sqrshrun_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqrshrun s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end12:
+ .size test_sqrshrun_s32, .Lfunc_end12-test_sqrshrun_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqrshrn_s32 // -- Begin function test_uqrshrn_s32
+ .p2align 2
+ .type test_uqrshrn_s32, at function
+test_uqrshrn_s32: // @test_uqrshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqrshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end13:
+ .size test_uqrshrn_s32, .Lfunc_end13-test_uqrshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqadd_s32 // -- Begin function test_sqadd_s32
+ .p2align 2
+ .type test_sqadd_s32, at function
+test_sqadd_s32: // @test_sqadd_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ sqadd s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end14:
+ .size test_sqadd_s32, .Lfunc_end14-test_sqadd_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqadd_s64 // -- Begin function test_sqadd_s64
+ .p2align 2
+ .type test_sqadd_s64, at function
+test_sqadd_s64: // @test_sqadd_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqadd d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end15:
+ .size test_sqadd_s64, .Lfunc_end15-test_sqadd_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_sqsub_s32 // -- B...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171485
More information about the llvm-commits
mailing list