[llvm] 8247068 - [RISCV] Support (truncate (smin (smax X, C1), C2)) for vnclipu in combineTruncToVnclip. (#93756)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 30 13:41:10 PDT 2024
Author: Craig Topper
Date: 2024-05-30T15:41:07-05:00
New Revision: 8247068b70106d188d037b9cc4b8de3548108597
URL: https://github.com/llvm/llvm-project/commit/8247068b70106d188d037b9cc4b8de3548108597
DIFF: https://github.com/llvm/llvm-project/commit/8247068b70106d188d037b9cc4b8de3548108597.diff
LOG: [RISCV] Support (truncate (smin (smax X, C1), C2)) for vnclipu in combineTruncToVnclip. (#93756)
If the smax removed all negative numbers, then we can treat the smin
like a umin.
If the smin and smax are in the other order we can swap them and use a
vnclipu as long as the smax constant is smaller than the smin constant.
This is based on similar code from X86's detectUSatPattern.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0e7713509e969..1635b5c5805ad 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16230,17 +16230,37 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
return SDValue();
};
- auto DetectUSatPattern = [&](SDValue V) {
- // Src must be a UMIN or UMIN_VL.
- APInt C;
- SDValue UMin = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, C);
- if (!UMin)
- return SDValue();
+ SDLoc DL(N);
- if (!C.isMask(VT.getScalarSizeInBits()))
- return SDValue();
+ auto DetectUSatPattern = [&](SDValue V) {
+ APInt LoC, HiC;
+
+ // Simple case, V is a UMIN.
+ if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
+ if (HiC.isMask(VT.getScalarSizeInBits()))
+ return UMinOp;
+
+ // If we have an SMAX that removes negative numbers first, then we can match
+ // SMIN instead of UMIN.
+ if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
+ if (SDValue SMaxOp =
+ MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
+ if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
+ return SMinOp;
+
+ // If we have an SMIN before an SMAX and the SMAX constant is less than or
+ // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
+ // first.
+ if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
+ if (SDValue SMinOp =
+ MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
+ if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
+ HiC.uge(LoC))
+ return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
+ V.getOperand(1), DAG.getUNDEF(V.getValueType()),
+ Mask, VL);
- return UMin;
+ return SDValue();
};
auto DetectSSatPattern = [&](SDValue V) {
@@ -16249,16 +16269,18 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
- APInt CMin, CMax;
- if (SDValue SMin = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, CMin))
- if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, RISCVISD::SMAX_VL, CMax))
- if (CMin == SignedMax && CMax == SignedMin)
- return SMax;
+ APInt HiC, LoC;
+ if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
+ if (SDValue SMaxOp =
+ MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
+ if (HiC == SignedMax && LoC == SignedMin)
+ return SMaxOp;
- if (SDValue SMax = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, CMax))
- if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, RISCVISD::SMIN_VL, CMin))
- if (CMin == SignedMax && CMax == SignedMin)
- return SMin;
+ if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
+ if (SDValue SMinOp =
+ MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
+ if (HiC == SignedMax && LoC == SignedMin)
+ return SMinOp;
return SDValue();
};
@@ -16272,7 +16294,6 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
else
return SDValue();
- SDLoc DL(N);
// Rounding mode here is arbitrary since we aren't shifting out any bits.
return DAG.getNode(
ClipOpc, DL, VT,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index 9f82eddf432da..fa7de06a8c0d7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -98,17 +98,14 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i16>, ptr %x, align 16
@@ -119,17 +116,14 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i16>, ptr %x, align 16
@@ -229,7 +223,6 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u32_maxmin:
; CHECK: # %bb.0:
@@ -237,11 +230,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -252,19 +242,15 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: li a0, 50
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -367,18 +353,14 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: srli a0, a0, 32
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -389,18 +371,14 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: srli a0, a0, 32
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -477,7 +455,6 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
; CHECK: # %bb.0:
@@ -500,7 +477,6 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_minmax:
; CHECK: # %bb.0:
@@ -595,7 +571,6 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
; CHECK: # %bb.0:
@@ -620,7 +595,6 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_minmax:
; CHECK: # %bb.0:
@@ -735,7 +709,6 @@ define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_maxmin:
; CHECK: # %bb.0:
@@ -760,7 +733,6 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_minmax:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index aa845bd8bb0b7..3e2db3fa4685d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -113,12 +113,9 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -307,13 +304,10 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT: vmin.vx v8, v10, a0
-; CHECK-V-NEXT: vmax.vx v10, v8, zero
+; CHECK-V-NEXT: vmax.vx v10, v10, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -817,12 +811,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
@@ -953,12 +944,9 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v9, a0
-; CHECK-V-NEXT: vmax.vx v8, v8, zero
+; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -1151,12 +1139,9 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -2146,12 +2131,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
@@ -3491,12 +3473,9 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -3680,13 +3659,10 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT: vmin.vx v8, v10, a0
-; CHECK-V-NEXT: vmax.vx v10, v8, zero
+; CHECK-V-NEXT: vmax.vx v10, v10, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -4185,12 +4161,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 2
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
@@ -4316,12 +4289,9 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v9, a0
-; CHECK-V-NEXT: vmax.vx v8, v8, zero
+; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -4509,12 +4479,9 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -5499,12 +5466,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-V-NEXT: vslideup.vi v8, v10, 4
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index 78e8f0fbbbdd7..cc78b6b2a1312 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -98,17 +98,14 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -119,17 +116,14 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -229,7 +223,6 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u32_maxmin:
; CHECK: # %bb.0:
@@ -237,11 +230,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vs1r.v v10, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -252,19 +242,15 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: li a0, 50
+; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vs1r.v v10, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -367,18 +353,14 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: srli a0, a0, 32
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vs2r.v v12, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -389,18 +371,14 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: srli a0, a0, 32
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vs2r.v v12, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -477,7 +455,6 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
; CHECK: # %bb.0:
@@ -500,7 +477,6 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_minmax:
; CHECK: # %bb.0:
@@ -595,7 +571,6 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
; CHECK: # %bb.0:
@@ -620,7 +595,6 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_minmax:
; CHECK: # %bb.0:
@@ -714,7 +688,6 @@ define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_maxmin:
; CHECK: # %bb.0:
@@ -739,7 +712,6 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
ret void
}
-; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_minmax:
; CHECK: # %bb.0:
More information about the llvm-commits
mailing list