[llvm] 57356d6 - [DAG] Create fptoui.sat from clamped fptoui
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 26 00:37:48 PST 2022
Author: David Green
Date: 2022-01-26T08:37:44Z
New Revision: 57356d6bb72adb1e85eb8b9ac5d9321b84e22fe6
URL: https://github.com/llvm/llvm-project/commit/57356d6bb72adb1e85eb8b9ac5d9321b84e22fe6
DIFF: https://github.com/llvm/llvm-project/commit/57356d6bb72adb1e85eb8b9ac5d9321b84e22fe6.diff
LOG: [DAG] Create fptoui.sat from clamped fptoui
This is the unsigned variant of D111976, where we convert a clamped
fptoui to a fptoui.sat. Because we are unsigned, the condition this time
is only UMIN of UINT_MAX. Similarly to D111976 it handles ISD::UMIN,
ISD::SETCC/ISD::SELECT, ISD::VSELECT or ISD::SELECT_CC nodes.
This especially helps on ARM/AArch64 where the vcvt instructions
naturally saturate the result.
Differential Revision: https://reviews.llvm.org/D114964
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/fpclamptosat.ll
llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
llvm/test/CodeGen/ARM/fpclamptosat.ll
llvm/test/CodeGen/RISCV/fpclamptosat.ll
llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 082e2508aa4b..7e3dbb91f514 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4892,6 +4892,42 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
: DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
}
+static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ SelectionDAG &DAG) {
+ // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
+ // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
+ // be truncated versions of the the setcc (N0/N1).
+ if ((N0 != N2 &&
+ (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
+ N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
+ return SDValue();
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ if (!N1C || !N3C)
+ return SDValue();
+ const APInt &C1 = N1C->getAPIntValue();
+ const APInt &C3 = N3C->getAPIntValue();
+ if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
+ C1 != C3.zextOrSelf(C1.getBitWidth()))
+ return SDValue();
+
+ unsigned BW = (C1 + 1).exactLogBase2();
+ EVT FPVT = N0.getOperand(0).getValueType();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+ if (FPVT.isVector())
+ NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+ FPVT.getVectorElementCount());
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
+ FPVT, NewVT))
+ return SDValue();
+
+ SDValue Sat =
+ DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
+ DAG.getValueType(NewVT.getScalarType()));
+ return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
+}
+
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4934,6 +4970,9 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue S = PerformMinMaxFpToSatCombine(
N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
return S;
+ if (Opcode == ISD::UMIN)
+ if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
+ return S;
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -10314,6 +10353,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
return S;
+ if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+ return S;
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
@@ -23372,6 +23413,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
return S;
+ if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+ return S;
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
index ca399acc5448..829b4ddbcc5c 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
@@ -22,10 +22,7 @@ entry:
define i32 @utest_f64i32(double %x) {
; CHECK-LABEL: utest_f64i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csinv w0, w8, wzr, lo
+; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: ret
entry:
%conv = fptoui double %x to i64
@@ -68,10 +65,7 @@ entry:
define i32 @utest_f32i32(float %x) {
; CHECK-LABEL: utest_f32i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu x8, s0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csinv w0, w8, wzr, lo
+; CHECK-NEXT: fcvtzu w0, s0
; CHECK-NEXT: ret
entry:
%conv = fptoui float %x to i64
@@ -121,18 +115,12 @@ define i32 @utesth_f16i32(half %x) {
; CHECK-CVT-LABEL: utesth_f16i32:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #-1
-; CHECK-CVT-NEXT: fcvtzu x8, s0
-; CHECK-CVT-NEXT: cmp x8, x9
-; CHECK-CVT-NEXT: csinv w0, w8, wzr, lo
+; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: utesth_f16i32:
; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: fcvtzu x8, h0
-; CHECK-FP16-NEXT: mov w9, #-1
-; CHECK-FP16-NEXT: cmp x8, x9
-; CHECK-FP16-NEXT: csinv w0, w8, wzr, lo
+; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
entry:
%conv = fptoui half %x to i64
@@ -581,11 +569,7 @@ entry:
define i32 @utest_f64i32_mm(double %x) {
; CHECK-LABEL: utest_f64i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x0, x8, x9, lo
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: ret
entry:
%conv = fptoui double %x to i64
@@ -623,11 +607,7 @@ entry:
define i32 @utest_f32i32_mm(float %x) {
; CHECK-LABEL: utest_f32i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzu x8, s0
-; CHECK-NEXT: mov w9, #-1
-; CHECK-NEXT: cmp x8, x9
-; CHECK-NEXT: csel x0, x8, x9, lo
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: fcvtzu w0, s0
; CHECK-NEXT: ret
entry:
%conv = fptoui float %x to i64
@@ -672,20 +652,12 @@ define i32 @utesth_f16i32_mm(half %x) {
; CHECK-CVT-LABEL: utesth_f16i32_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: mov w9, #-1
-; CHECK-CVT-NEXT: fcvtzu x8, s0
-; CHECK-CVT-NEXT: cmp x8, x9
-; CHECK-CVT-NEXT: csel x0, x8, x9, lo
-; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: utesth_f16i32_mm:
; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: fcvtzu x8, h0
-; CHECK-FP16-NEXT: mov w9, #-1
-; CHECK-FP16-NEXT: cmp x8, x9
-; CHECK-FP16-NEXT: csel x0, x8, x9, lo
-; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
entry:
%conv = fptoui half %x to i64
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index b66ba95daf80..5d06f762a771 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -27,12 +27,12 @@ entry:
define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-LABEL: utest_f64i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: mov d1, v0.d[1]
+; CHECK-NEXT: fcvtzu w8, d0
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fcvtzu w8, d1
+; CHECK-NEXT: mov v0.s[1], w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
@@ -80,18 +80,7 @@ entry:
define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl2 v2.2d, v0.4s
-; CHECK-NEXT: fcvtl v0.2d, v0.2s
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-NEXT: cmhi v3.2d, v1.2d, v2.2d
-; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b
-; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-NEXT: ret
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
@@ -133,57 +122,11 @@ entry:
}
define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i32:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h2, v0.h[2]
-; CHECK-CVT-NEXT: mov h3, v0.h[3]
-; CHECK-CVT-NEXT: mov h4, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: fcvt s3, h3
-; CHECK-CVT-NEXT: fcvtzu x9, s0
-; CHECK-CVT-NEXT: fcvtzu x8, s2
-; CHECK-CVT-NEXT: fcvt s2, h4
-; CHECK-CVT-NEXT: fmov d0, x8
-; CHECK-CVT-NEXT: fcvtzu x8, s3
-; CHECK-CVT-NEXT: fmov d3, x9
-; CHECK-CVT-NEXT: fcvtzu x9, s2
-; CHECK-CVT-NEXT: mov v0.d[1], x8
-; CHECK-CVT-NEXT: mov v3.d[1], x9
-; CHECK-CVT-NEXT: cmhi v2.2d, v1.2d, v0.2d
-; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT: orn v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT: uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: utesth_f16i32:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu x9, h0
-; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT: fcvtzu x8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fmov d0, x8
-; CHECK-FP16-NEXT: fcvtzu x8, h3
-; CHECK-FP16-NEXT: fmov d3, x9
-; CHECK-FP16-NEXT: fcvtzu x9, h2
-; CHECK-FP16-NEXT: mov v0.d[1], x8
-; CHECK-FP16-NEXT: mov v3.d[1], x9
-; CHECK-FP16-NEXT: cmhi v2.2d, v1.2d, v0.2d
-; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT: orn v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT: uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: utesth_f16i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
%0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -338,17 +281,22 @@ entry:
}
define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: fcvtzu v2.4s, v2.4s
-; CHECK-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s
-; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-CVT-LABEL: utesth_f16i16:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT: ret
entry:
%conv = fptoui <8 x half> %x to <8 x i32>
%0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -758,12 +706,12 @@ entry:
define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
; CHECK-LABEL: utest_f64i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: mov d1, v0.d[1]
+; CHECK-NEXT: fcvtzu w8, d0
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fcvtzu w8, d1
+; CHECK-NEXT: mov v0.s[1], w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
@@ -806,18 +754,7 @@ entry:
define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32_mm:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl2 v2.2d, v0.4s
-; CHECK-NEXT: fcvtl v0.2d, v0.2s
-; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT: fcvtzu v2.2d, v2.2d
-; CHECK-NEXT: fcvtzu v0.2d, v0.2d
-; CHECK-NEXT: cmhi v3.2d, v1.2d, v2.2d
-; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b
-; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-NEXT: ret
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
@@ -854,57 +791,11 @@ entry:
}
define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i32_mm:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT: mov h2, v0.h[2]
-; CHECK-CVT-NEXT: mov h3, v0.h[3]
-; CHECK-CVT-NEXT: mov h4, v0.h[1]
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-NEXT: fcvt s2, h2
-; CHECK-CVT-NEXT: fcvt s3, h3
-; CHECK-CVT-NEXT: fcvtzu x9, s0
-; CHECK-CVT-NEXT: fcvtzu x8, s2
-; CHECK-CVT-NEXT: fcvt s2, h4
-; CHECK-CVT-NEXT: fmov d0, x8
-; CHECK-CVT-NEXT: fcvtzu x8, s3
-; CHECK-CVT-NEXT: fmov d3, x9
-; CHECK-CVT-NEXT: fcvtzu x9, s2
-; CHECK-CVT-NEXT: mov v0.d[1], x8
-; CHECK-CVT-NEXT: mov v3.d[1], x9
-; CHECK-CVT-NEXT: cmhi v2.2d, v1.2d, v0.2d
-; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT: orn v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT: uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-FP16-NEXT: mov h3, v0.h[3]
-; CHECK-FP16-NEXT: fcvtzu x9, h0
-; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT: fcvtzu x8, h2
-; CHECK-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-FP16-NEXT: fmov d0, x8
-; CHECK-FP16-NEXT: fcvtzu x8, h3
-; CHECK-FP16-NEXT: fmov d3, x9
-; CHECK-FP16-NEXT: fcvtzu x9, h2
-; CHECK-FP16-NEXT: mov v0.d[1], x8
-; CHECK-FP16-NEXT: mov v3.d[1], x9
-; CHECK-FP16-NEXT: cmhi v2.2d, v1.2d, v0.2d
-; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT: orn v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT: uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-FP16-NEXT: ret
+; CHECK-LABEL: utesth_f16i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -1044,17 +935,22 @@ entry:
}
define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: fcvtzu v2.4s, v2.4s
-; CHECK-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s
-; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-CVT-LABEL: utesth_f16i16_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT: ret
entry:
%conv = fptoui <8 x half> %x to <8 x i32>
%spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index e8bf0ed47754..5223ae1286f5 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -104,17 +104,23 @@ define i32 @utest_f64i32(double %x) {
; SOFT-NEXT: .LBB1_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
;
-; VFP-LABEL: utest_f64i32:
-; VFP: @ %bb.0: @ %entry
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
-; VFP-NEXT: vmov r0, r1, d0
-; VFP-NEXT: bl __aeabi_d2ulz
-; VFP-NEXT: subs.w r2, r0, #-1
-; VFP-NEXT: sbcs r1, r1, #0
-; VFP-NEXT: it hs
-; VFP-NEXT: movhs.w r0, #-1
-; VFP-NEXT: pop {r7, pc}
+; VFP2-LABEL: utest_f64i32:
+; VFP2: @ %bb.0: @ %entry
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: vmov r0, r1, d0
+; VFP2-NEXT: bl __aeabi_d2ulz
+; VFP2-NEXT: subs.w r2, r0, #-1
+; VFP2-NEXT: sbcs r1, r1, #0
+; VFP2-NEXT: it hs
+; VFP2-NEXT: movhs.w r0, #-1
+; VFP2-NEXT: pop {r7, pc}
+;
+; FULL-LABEL: utest_f64i32:
+; FULL: @ %bb.0: @ %entry
+; FULL-NEXT: vcvt.u32.f64 s0, d0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptoui double %x to i64
%0 = icmp ult i64 %conv, 4294967295
@@ -289,15 +295,9 @@ define i32 @utest_f32i32(float %x) {
;
; VFP-LABEL: utest_f32i32:
; VFP: @ %bb.0: @ %entry
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
+; VFP-NEXT: vcvt.u32.f32 s0, s0
; VFP-NEXT: vmov r0, s0
-; VFP-NEXT: bl __aeabi_f2ulz
-; VFP-NEXT: subs.w r2, r0, #-1
-; VFP-NEXT: sbcs r1, r1, #0
-; VFP-NEXT: it hs
-; VFP-NEXT: movhs.w r0, #-1
-; VFP-NEXT: pop {r7, pc}
+; VFP-NEXT: bx lr
entry:
%conv = fptoui float %x to i64
%0 = icmp ult i64 %conv, 4294967295
@@ -466,25 +466,16 @@ define i32 @utesth_f16i32(half %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: bl __aeabi_f2ulz
-; VFP2-NEXT: subs.w r2, r0, #-1
-; VFP2-NEXT: sbcs r1, r1, #0
-; VFP2-NEXT: it hs
-; VFP2-NEXT: movhs.w r0, #-1
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utesth_f16i32:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov.f16 r0, s0
-; FULL-NEXT: vmov s0, r0
-; FULL-NEXT: bl __fixunshfdi
-; FULL-NEXT: subs.w r2, r0, #-1
-; FULL-NEXT: sbcs r1, r1, #0
-; FULL-NEXT: it hs
-; FULL-NEXT: movhs.w r0, #-1
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vcvt.u32.f16 s0, s0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptoui half %x to i64
%0 = icmp ult i64 %conv, 4294967295
@@ -2240,16 +2231,22 @@ define i32 @utest_f64i32_mm(double %x) {
; SOFT-NEXT: .LBB28_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
;
-; VFP-LABEL: utest_f64i32_mm:
-; VFP: @ %bb.0: @ %entry
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
-; VFP-NEXT: vmov r0, r1, d0
-; VFP-NEXT: bl __aeabi_d2ulz
-; VFP-NEXT: cmp r1, #0
-; VFP-NEXT: it ne
-; VFP-NEXT: movne.w r0, #-1
-; VFP-NEXT: pop {r7, pc}
+; VFP2-LABEL: utest_f64i32_mm:
+; VFP2: @ %bb.0: @ %entry
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: vmov r0, r1, d0
+; VFP2-NEXT: bl __aeabi_d2ulz
+; VFP2-NEXT: cmp r1, #0
+; VFP2-NEXT: it ne
+; VFP2-NEXT: movne.w r0, #-1
+; VFP2-NEXT: pop {r7, pc}
+;
+; FULL-LABEL: utest_f64i32_mm:
+; FULL: @ %bb.0: @ %entry
+; FULL-NEXT: vcvt.u32.f64 s0, d0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptoui double %x to i64
%spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
@@ -2429,14 +2426,9 @@ define i32 @utest_f32i32_mm(float %x) {
;
; VFP-LABEL: utest_f32i32_mm:
; VFP: @ %bb.0: @ %entry
-; VFP-NEXT: .save {r7, lr}
-; VFP-NEXT: push {r7, lr}
+; VFP-NEXT: vcvt.u32.f32 s0, s0
; VFP-NEXT: vmov r0, s0
-; VFP-NEXT: bl __aeabi_f2ulz
-; VFP-NEXT: cmp r1, #0
-; VFP-NEXT: it ne
-; VFP-NEXT: movne.w r0, #-1
-; VFP-NEXT: pop {r7, pc}
+; VFP-NEXT: bx lr
entry:
%conv = fptoui float %x to i64
%spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
@@ -2609,23 +2601,16 @@ define i32 @utesth_f16i32_mm(half %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: bl __aeabi_f2ulz
-; VFP2-NEXT: cmp r1, #0
-; VFP2-NEXT: it ne
-; VFP2-NEXT: movne.w r0, #-1
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: utesth_f16i32_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: vmov.f16 r0, s0
-; FULL-NEXT: vmov s0, r0
-; FULL-NEXT: bl __fixunshfdi
-; FULL-NEXT: cmp r1, #0
-; FULL-NEXT: it ne
-; FULL-NEXT: movne.w r0, #-1
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vcvt.u32.f16 s0, s0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bx lr
entry:
%conv = fptoui half %x to i64
%spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 69a646db09aa..4643048232d8 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -115,28 +115,28 @@ entry:
}
define i32 @utest_f64i32(double %x) {
-; RV32-LABEL: utest_f64i32:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixunsdfdi at plt
-; RV32-NEXT: beqz a1, .LBB1_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a1, .LBB1_3
-; RV32-NEXT: j .LBB1_4
-; RV32-NEXT: .LBB1_2:
-; RV32-NEXT: addi a1, a0, 1
-; RV32-NEXT: snez a1, a1
-; RV32-NEXT: bnez a1, .LBB1_4
-; RV32-NEXT: .LBB1_3: # %entry
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB1_4: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
+; RV32IF-LABEL: utest_f64i32:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixunsdfdi at plt
+; RV32IF-NEXT: beqz a1, .LBB1_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: beqz a1, .LBB1_3
+; RV32IF-NEXT: j .LBB1_4
+; RV32IF-NEXT: .LBB1_2:
+; RV32IF-NEXT: addi a1, a0, 1
+; RV32IF-NEXT: snez a1, a1
+; RV32IF-NEXT: bnez a1, .LBB1_4
+; RV32IF-NEXT: .LBB1_3: # %entry
+; RV32IF-NEXT: li a0, -1
+; RV32IF-NEXT: .LBB1_4: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
;
; RV64IF-LABEL: utest_f64i32:
; RV64IF: # %bb.0: # %entry
@@ -155,6 +155,24 @@ define i32 @utest_f64i32(double %x) {
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
;
+; RV32IFD-LABEL: utest_f64i32:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: feq.d a0, ft0, ft0
+; RV32IFD-NEXT: bnez a0, .LBB1_2
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB1_2:
+; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
; RV64IFD-LABEL: utest_f64i32:
; RV64IFD: # %bb.0: # %entry
; RV64IFD-NEXT: fmv.d.x ft0, a0
@@ -319,25 +337,14 @@ entry:
define i32 @utest_f32i32(float %x) {
; RV32-LABEL: utest_f32i32:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixunssfdi at plt
-; RV32-NEXT: beqz a1, .LBB4_2
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: feq.s a0, ft0, ft0
+; RV32-NEXT: bnez a0, .LBB4_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: beqz a1, .LBB4_3
-; RV32-NEXT: j .LBB4_4
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
; RV32-NEXT: .LBB4_2:
-; RV32-NEXT: addi a1, a0, 1
-; RV32-NEXT: snez a1, a1
-; RV32-NEXT: bnez a1, .LBB4_4
-; RV32-NEXT: .LBB4_3: # %entry
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB4_4: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
; RV32-NEXT: ret
;
; RV64-LABEL: utest_f32i32:
@@ -2071,20 +2078,20 @@ entry:
}
define i32 @utest_f64i32_mm(double %x) {
-; RV32-LABEL: utest_f64i32_mm:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixunsdfdi at plt
-; RV32-NEXT: beqz a1, .LBB28_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB28_2: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
+; RV32IF-LABEL: utest_f64i32_mm:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixunsdfdi at plt
+; RV32IF-NEXT: beqz a1, .LBB28_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: li a0, -1
+; RV32IF-NEXT: .LBB28_2: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
;
; RV64IF-LABEL: utest_f64i32_mm:
; RV64IF: # %bb.0: # %entry
@@ -2103,6 +2110,24 @@ define i32 @utest_f64i32_mm(double %x) {
; RV64IF-NEXT: addi sp, sp, 16
; RV64IF-NEXT: ret
;
+; RV32IFD-LABEL: utest_f64i32_mm:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: feq.d a0, ft0, ft0
+; RV32IFD-NEXT: bnez a0, .LBB28_2
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB28_2:
+; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
; RV64IFD-LABEL: utest_f64i32_mm:
; RV64IFD: # %bb.0: # %entry
; RV64IFD-NEXT: fmv.d.x ft0, a0
@@ -2266,17 +2291,14 @@ entry:
define i32 @utest_f32i32_mm(float %x) {
; RV32-LABEL: utest_f32i32_mm:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: call __fixunssfdi at plt
-; RV32-NEXT: beqz a1, .LBB31_2
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: feq.s a0, ft0, ft0
+; RV32-NEXT: bnez a0, .LBB31_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB31_2: # %entry
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB31_2:
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
; RV32-NEXT: ret
;
; RV64-LABEL: utest_f32i32_mm:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 08bcba9b5cd7..1e9fbe09539c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -183,58 +183,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vmov r0, r4, d9
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: csetm r2, lo
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: csetm r0, lo
-; CHECK-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmov r0, r4, d8
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: vmsr p0, r3
-; CHECK-NEXT: movs r7, #0
-; CHECK-NEXT: vpsel q6, q0, q5
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: csetm r2, lo
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r7, r2, #0, #8
-; CHECK-NEXT: csetm r0, lo
-; CHECK-NEXT: bfi r7, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r7
-; CHECK-NEXT: vpsel q0, q0, q5
-; CHECK-NEXT: vmov.f32 s1, s2
-; CHECK-NEXT: vmov.f32 s2, s24
-; CHECK-NEXT: vmov.f32 s3, s26
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: vcvt.u32.f32 q0, q0
+; CHECK-NEXT: bx lr
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
%0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -1396,58 +1346,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32_mm:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vmov r0, r4, d9
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: csetm r2, lo
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: csetm r0, lo
-; CHECK-NEXT: vmov.i64 q5, #0xffffffff
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmov r0, r4, d8
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: vmsr p0, r3
-; CHECK-NEXT: movs r7, #0
-; CHECK-NEXT: vpsel q6, q0, q5
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r6, r1
-; CHECK-NEXT: bl __aeabi_f2ulz
-; CHECK-NEXT: subs.w r2, r5, #-1
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT: sbcs r2, r6, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT: csetm r2, lo
-; CHECK-NEXT: subs.w r0, r0, #-1
-; CHECK-NEXT: sbcs r0, r1, #0
-; CHECK-NEXT: bfi r7, r2, #0, #8
-; CHECK-NEXT: csetm r0, lo
-; CHECK-NEXT: bfi r7, r0, #8, #8
-; CHECK-NEXT: vmsr p0, r7
-; CHECK-NEXT: vpsel q0, q0, q5
-; CHECK-NEXT: vmov.f32 s1, s2
-; CHECK-NEXT: vmov.f32 s2, s24
-; CHECK-NEXT: vmov.f32 s3, s26
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: vcvt.u32.f32 q0, q0
+; CHECK-NEXT: bx lr
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index dabcae9fd359..f8eeb2105e32 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -23,17 +23,9 @@ entry:
define i32 @utest_f64i32(double %x) {
; CHECK-LABEL: utest_f64i32:
; CHECK: .functype utest_f64i32 (f64) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f64_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f64_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui double %x to i64
@@ -80,17 +72,9 @@ entry:
define i32 @utest_f32i32(float %x) {
; CHECK-LABEL: utest_f32i32:
; CHECK: .functype utest_f32i32 (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui float %x to i64
@@ -139,19 +123,11 @@ entry:
define i32 @utesth_f16i32(half %x) {
; CHECK-LABEL: utesth_f16i32:
; CHECK: .functype utesth_f16i32 (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui half %x to i64
@@ -810,17 +786,9 @@ entry:
define i32 @utest_f64i32_mm(double %x) {
; CHECK-LABEL: utest_f64i32_mm:
; CHECK: .functype utest_f64i32_mm (f64) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f64_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f64_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui double %x to i64
@@ -862,17 +830,9 @@ entry:
define i32 @utest_f32i32_mm(float %x) {
; CHECK-LABEL: utest_f32i32_mm:
; CHECK: .functype utest_f32i32_mm (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui float %x to i64
@@ -916,19 +876,11 @@ entry:
define i32 @utesth_f16i32_mm(half %x) {
; CHECK-LABEL: utesth_f16i32_mm:
; CHECK: .functype utesth_f16i32_mm (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui half %x to i64
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index a595ffe51e2e..c27da85cbf48 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -145,62 +145,9 @@ entry:
define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32:
; CHECK: .functype utest_f32i32 (v128) -> (v128)
-; CHECK-NEXT: .local i64, i64, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 0
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 1
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 2
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 3
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 3
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 2
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
@@ -267,16 +214,7 @@ entry:
define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-LABEL: utesth_f16i32:
; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local i64, i64, v128
; CHECK-NEXT: # %bb.0: # %entry
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 3
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 2
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
@@ -284,55 +222,21 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 6
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
; CHECK-NEXT: local.get 3
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.get 6
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
@@ -1742,62 +1646,9 @@ entry:
define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32_mm:
; CHECK: .functype utest_f32i32_mm (v128) -> (v128)
-; CHECK-NEXT: .local i64, i64, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 0
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 1
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 2
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 3
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: f32x4.extract_lane 3
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 2
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
@@ -1859,16 +1710,7 @@ entry:
define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-LABEL: utesth_f16i32_mm:
; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local i64, i64, v128
; CHECK-NEXT: # %bb.0: # %entry
-; CHECK-NEXT: local.get 3
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 3
-; CHECK-NEXT: local.get 2
-; CHECK-NEXT: call __truncsfhf2
-; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: local.set 2
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
@@ -1876,55 +1718,21 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.const 4294967295, 4294967295
-; CHECK-NEXT: local.tee 6
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
; CHECK-NEXT: local.get 2
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 4
-; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
; CHECK-NEXT: local.get 3
-; CHECK-NEXT: i64.trunc_sat_f32_u
-; CHECK-NEXT: local.tee 5
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: local.get 6
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 4
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.splat
-; CHECK-NEXT: i64.const -1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 5
-; CHECK-NEXT: i64.const 4294967295
-; CHECK-NEXT: i64.lt_u
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i64x2.replace_lane 1
-; CHECK-NEXT: v128.bitselect
-; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
More information about the llvm-commits
mailing list