[llvm] edf4e02 - [RISCV] Support multiple levels of truncates in combineTruncToVnclip. (#93752)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 31 07:09:17 PDT 2024
Author: Craig Topper
Date: 2024-05-31T09:09:12-05:00
New Revision: edf4e02906bd0c57087a44e5c4890ffc453667b2
URL: https://github.com/llvm/llvm-project/commit/edf4e02906bd0c57087a44e5c4890ffc453667b2
DIFF: https://github.com/llvm/llvm-project/commit/edf4e02906bd0c57087a44e5c4890ffc453667b2.diff
LOG: [RISCV] Support multiple levels of truncates in combineTruncToVnclip. (#93752)
We can use multiple vnclips to saturate an i32 value into an i8 value.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1635b5c5805ad..8ace5d79af079 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16285,21 +16285,37 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
return SDValue();
};
+ SDValue Src = N->getOperand(0);
+
+ // Look through multiple layers of truncates.
+ while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
+ Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
+ Src.hasOneUse())
+ Src = Src.getOperand(0);
+
SDValue Val;
unsigned ClipOpc;
- if ((Val = DetectUSatPattern(N->getOperand(0))))
+ if ((Val = DetectUSatPattern(Src)))
ClipOpc = RISCVISD::VNCLIPU_VL;
- else if ((Val = DetectSSatPattern(N->getOperand(0))))
+ else if ((Val = DetectSSatPattern(Src)))
ClipOpc = RISCVISD::VNCLIP_VL;
else
return SDValue();
- // Rounding mode here is arbitrary since we aren't shifting out any bits.
- return DAG.getNode(
- ClipOpc, DL, VT,
- {Val, DAG.getConstant(0, DL, VT), DAG.getUNDEF(VT), Mask,
- DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()),
- VL});
+ MVT ValVT = Val.getSimpleValueType();
+
+ do {
+ MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
+ ValVT = ValVT.changeVectorElementType(ValEltVT);
+ // Rounding mode here is arbitrary since we aren't shifting out any bits.
+ Val = DAG.getNode(
+ ClipOpc, DL, ValVT,
+ {Val, DAG.getConstant(0, DL, ValVT), DAG.getUNDEF(VT), Mask,
+ DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()),
+ VL});
+ } while (ValVT != VT);
+
+ return Val;
}
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index fa7de06a8c0d7..4e367bb0d70cd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -392,16 +392,11 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -415,16 +410,11 @@ define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -438,14 +428,11 @@ define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -461,12 +448,10 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -482,13 +467,11 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
@@ -502,18 +485,13 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -527,18 +505,13 @@ define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -552,16 +525,13 @@ define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -577,14 +547,12 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -600,15 +568,13 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -622,17 +588,11 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
@@ -646,17 +606,11 @@ define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
@@ -691,15 +645,11 @@ define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) {
define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
@@ -716,13 +666,10 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
@@ -738,15 +685,12 @@ define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: li a0, 50
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index cc78b6b2a1312..01a90d8a33b6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -393,15 +393,10 @@ define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -416,15 +411,10 @@ define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -439,13 +429,10 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -461,12 +448,10 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vl2re32.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -481,14 +466,12 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -503,17 +486,12 @@ define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -528,17 +506,12 @@ define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: li a0, -128
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -553,15 +526,12 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -577,14 +547,12 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vl4re64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -599,16 +567,14 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -623,16 +589,10 @@ define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -647,16 +607,10 @@ define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 1048568
-; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclip.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclip.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -671,14 +625,10 @@ define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vminu.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 32
@@ -695,13 +645,10 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -716,16 +663,13 @@ define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: lui a0, 16
-; CHECK-NEXT: addiw a0, a0, -1
-; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: li a0, 50
+; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: vs1r.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i64>, ptr %x, align 16
More information about the llvm-commits
mailing list