[llvm] [AArch64][GlobalISel] Combine to sqxtn pre legalization for FewerElements (PR #181163)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 12 07:12:46 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
Post legalization we will not have v2i64 MIN and MAX, which prevents the recognition of saturating truncates. This changes the combiner rules to combine pre-legalization, providing that the vector operation will be clamped (like a v4i64).
---
Patch is 42.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/181163.diff
7 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+4)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+13-3)
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+3)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2-1)
- (modified) llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll (+82-372)
- (modified) llvm/test/CodeGen/AArch64/qmovn.ll (+55-97)
- (modified) llvm/test/CodeGen/AArch64/qshrn.ll (+18-62)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index da53005ed801e..6e5c051551746 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -157,6 +157,10 @@ class CombinerHelper {
/// perform WidenScalar action on the target.
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const;
+ /// \return true if \p Query is legal on the target, or if \p Query will
+ /// perform a FewerElements action on the target.
+ bool isLegalOrHasFewerElements(const LegalityQuery &Query) const;
+
/// \return true if the combine is running prior to legalization, or if \p Ty
/// is a legal integer constant type on the target.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b9273d388ea70..2a5c6ef467483 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -168,6 +168,13 @@ bool CombinerHelper::isLegalOrHasWidenScalar(const LegalityQuery &Query) const {
LI->getAction(Query).Action == LegalizeActions::WidenScalar;
}
+bool CombinerHelper::isLegalOrHasFewerElements(
+ const LegalityQuery &Query) const {
+ LegalizeAction Action = LI->getAction(Query).Action;
+ return Action == LegalizeActions::Legal ||
+ Action == LegalizeActions::FewerElements;
+}
+
bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
if (!Ty.isVector())
return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
@@ -6047,7 +6054,8 @@ bool CombinerHelper::matchTruncSSatS(MachineInstr &MI,
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
- if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
+ if (!LI || !isLegalOrHasFewerElements(
+ {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
return false;
APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
@@ -6079,7 +6087,8 @@ bool CombinerHelper::matchTruncSSatU(MachineInstr &MI,
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
- if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
+ if (!LI || !isLegalOrHasFewerElements(
+ {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
return false;
APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
return mi_match(Src, MRI,
@@ -6111,7 +6120,8 @@ bool CombinerHelper::matchTruncUSatU(MachineInstr &MI,
unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
- if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
+ if (!LI || !isLegalOrHasFewerElements(
+ {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
return false;
APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e6eec3194b716..e30958397803d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5648,6 +5648,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STRICT_FMA:
case G_STRICT_FLDEXP:
case G_FFREXP:
+ case G_TRUNC_SSAT_S:
+ case G_TRUNC_SSAT_U:
+ case G_TRUNC_USAT_U:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 2ed567a1052ca..f04824a238560 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -810,7 +810,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.alwaysLegal();
getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
- .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}});
+ .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
+ .clampNumElements(0, v2s32, v2s32);
getActionDefinitionsBuilder(G_SEXT_INREG)
.legalFor({s32, s64})
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index fd7c869fe2f92..6d94bb08f2e79 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -99,42 +99,20 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI3_1
-; CHECK-CVT-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI3_0
; CHECK-CVT-GI-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-CVT-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
-; CHECK-CVT-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-CVT-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-CVT-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-CVT-GI-NEXT: sqxtn v0.2s, v1.2d
+; CHECK-CVT-GI-NEXT: sqxtn2 v0.4s, v2.2d
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stest_f32i32:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI3_1
-; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI3_0
; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-FP16-GI-NEXT: sqxtn v0.2s, v1.2d
+; CHECK-FP16-GI-NEXT: sqxtn2 v0.4s, v2.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -147,43 +125,10 @@ entry:
}
define <4 x i32> @utest_f32i32(<4 x float> %x) {
-; CHECK-CVT-SD-LABEL: utest_f32i32:
-; CHECK-CVT-SD: // %bb.0: // %entry
-; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: utest_f32i32:
-; CHECK-FP16-SD: // %bb.0: // %entry
-; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-CVT-GI-LABEL: utest_f32i32:
-; CHECK-CVT-GI: // %bb.0: // %entry
-; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v0.2s
-; CHECK-CVT-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-GI-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-CVT-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-CVT-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d
-; CHECK-CVT-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d
-; CHECK-CVT-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
-; CHECK-CVT-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: utest_f32i32:
-; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
-; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: utest_f32i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
%0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -205,38 +150,22 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
;
; CHECK-CVT-GI-LABEL: ustest_f32i32:
; CHECK-CVT-GI: // %bb.0: // %entry
-; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-CVT-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-GI-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-CVT-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d
-; CHECK-CVT-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-CVT-GI-NEXT: cmgt v1.2d, v2.2d, #0
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v0.2d, #0
-; CHECK-CVT-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-CVT-GI-NEXT: and v0.16b, v0.16b, v3.16b
-; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-CVT-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-CVT-GI-NEXT: sqxtun v0.2s, v1.2d
+; CHECK-CVT-GI-NEXT: sqxtun2 v0.4s, v2.2d
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: ustest_f32i32:
; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0
-; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-FP16-GI-NEXT: sqxtun v0.2s, v1.2d
+; CHECK-FP16-GI-NEXT: sqxtun2 v0.4s, v2.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -264,45 +193,23 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-CVT-GI-LABEL: stest_f16i32:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI6_1
-; CHECK-CVT-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI6_0
; CHECK-CVT-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-CVT-GI-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-CVT-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
-; CHECK-CVT-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-CVT-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_0]
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-CVT-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-CVT-GI-NEXT: sqxtn v0.2s, v1.2d
+; CHECK-CVT-GI-NEXT: sqxtn2 v0.4s, v2.2d
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stest_f16i32:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_1
-; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_0
; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_0]
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-FP16-GI-NEXT: sqxtn v0.2s, v1.2d
+; CHECK-FP16-GI-NEXT: sqxtn2 v0.4s, v2.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -315,47 +222,11 @@ entry:
}
define <4 x i32> @utest_f16i32(<4 x half> %x) {
-; CHECK-CVT-SD-LABEL: utest_f16i32:
-; CHECK-CVT-SD: // %bb.0: // %entry
-; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: utest_f16i32:
-; CHECK-FP16-SD: // %bb.0: // %entry
-; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-CVT-GI-LABEL: utest_f16i32:
-; CHECK-CVT-GI: // %bb.0: // %entry
-; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v0.2s
-; CHECK-CVT-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-CVT-GI-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-CVT-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-CVT-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d
-; CHECK-CVT-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d
-; CHECK-CVT-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
-; CHECK-CVT-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: utest_f16i32:
-; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
-; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: utest_f16i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
%0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -380,39 +251,23 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-CVT-GI-LABEL: ustest_f16i32:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-CVT-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-CVT-GI-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-CVT-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d
-; CHECK-CVT-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-CVT-GI-NEXT: cmgt v1.2d, v2.2d, #0
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v0.2d, #0
-; CHECK-CVT-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-CVT-GI-NEXT: and v0.16b, v0.16b, v3.16b
-; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-CVT-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-CVT-GI-NEXT: sqxtun v0.2s, v1.2d
+; CHECK-CVT-GI-NEXT: sqxtun2 v0.4s, v2.2d
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: ustest_f16i32:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d
-; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b
-; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0
-; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b
-; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-FP16-GI-NEXT: sqxtun v0.2s, v1.2d
+; CHECK-FP16-GI-NEXT: sqxtun2 v0.4s, v2.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
@@ -2071,42 +1926,20 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-CVT-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI30_1
-; CHECK-CVT-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_1]
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-CVT-GI-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-CVT-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
-; CHECK-CVT-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-CVT-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_0]
-; CHECK-CVT-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-CVT-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-CVT-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-CVT-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-CVT-GI-NEXT: sqxtn v0.2s, v1.2d
+; CHECK-CVT-GI-NEXT: sqxtn2 v0.4s, v2.2d
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stest_f32i32_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s
; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI30_1
-; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_1]
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d
-; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_0]
-; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d
-; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b
-; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v0.2d
+; CHECK-FP16-GI-NEXT: sqxtn v0.2s, v1.2d
+; CHECK-FP16-GI-NEXT: sqxtn2 v0.4s, v2.2d
; CHECK-FP16-GI-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -2117,43 +1950,10 @@ entry:
}
define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
-; CHECK-CVT-SD-LABEL: utest_f32i32_mm:
-; CHECK-CVT-SD: // %bb.0: // %entry
-; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-SD-NEXT: ret
-;...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/181163
More information about the llvm-commits
mailing list