[llvm] [RISCV] Don't support TRUNCATE_SSAT_U. (PR #104468)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 15 09:30:21 PDT 2024
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/104468
RISC-V doesn't have an instruction for this. We were treating it the same as TRUNCATE_USAT_U.
CC: @ParkHanbum
>From 897eef0658cffabb0b9cf434a253def3fcd2694c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 15 Aug 2024 09:27:23 -0700
Subject: [PATCH] [RISCV] Don't support TRUNCATE_SSAT_U.
RISC-V doesn't have an instruction for this. We were treating it
the same as TRUNCATE_USAT_U.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 15 ++--
.../RISCV/rvv/fixed-vectors-trunc-sat-clip.ll | 32 ++++++---
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 68 ++++++++++++-------
.../RISCV/rvv/trunc-sat-clip-sdnode.ll | 32 ++++++---
.../RISCV/rvv/trunc-select-to-max-usat.ll | 57 +++++++++++-----
5 files changed, 136 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e16dd132ed71bb..95bee98fe67fc8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -784,9 +784,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
- setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
- ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
- VT, Custom);
+ setOperationAction(
+ {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
+ Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
@@ -1101,9 +1101,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
- ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
- VT, Custom);
+ setOperationAction(
+ {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
+ Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
@@ -6230,7 +6230,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::TRUNCATE:
case ISD::TRUNCATE_SSAT_S:
- case ISD::TRUNCATE_SSAT_U:
case ISD::TRUNCATE_USAT_U:
// Only custom-lower vector truncates
if (!Op.getSimpleValueType().isVector())
@@ -8117,7 +8116,7 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
unsigned NewOpc;
if (Opc == ISD::TRUNCATE_SSAT_S)
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
- else if (Opc == ISD::TRUNCATE_SSAT_U || Opc == ISD::TRUNCATE_USAT_U)
+ else if (Opc == ISD::TRUNCATE_USAT_U)
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
else
NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index e2f540e991fd0b..4e367bb0d70cd1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -101,8 +101,10 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -117,8 +119,10 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -352,8 +356,10 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
@@ -368,8 +374,10 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
@@ -437,8 +445,10 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
@@ -455,8 +465,10 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
@@ -532,8 +544,10 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
@@ -552,8 +566,10 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 102ca143dfc737..1395dc914bb402 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -113,6 +113,7 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
+; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
@@ -303,6 +304,9 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
+; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-V-NEXT: vmax.vx v10, v10, zero
+; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
@@ -797,16 +801,17 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v10, a0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vslideup.vi v10, v8, 1
+; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
-; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vslideup.vi v8, v10, 2
+; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
@@ -939,8 +944,9 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -1133,6 +1139,7 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
+; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
@@ -2107,23 +2114,24 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT: vmv.s.x v10, a0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vslideup.vi v10, v8, 1
+; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vslideup.vi v8, v9, 2
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
-; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vslideup.vi v8, v10, 4
+; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
@@ -3465,6 +3473,7 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
+; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
@@ -3650,6 +3659,9 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
+; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-V-NEXT: vmax.vx v10, v10, zero
+; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
@@ -4139,16 +4151,17 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v10, a0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vslideup.vi v10, v8, 1
+; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
-; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vslideup.vi v8, v10, 2
+; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
@@ -4276,8 +4289,9 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
+; CHECK-V-NEXT: vmax.vx v8, v9, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -4465,6 +4479,7 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
+; CHECK-V-NEXT: vmax.vx v8, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
@@ -5434,23 +5449,24 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT: vmv.s.x v10, a0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vslideup.vi v10, v8, 1
+; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT: vslideup.vi v10, v8, 2
+; CHECK-V-NEXT: vslideup.vi v8, v9, 2
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add a0, sp, a0
; CHECK-V-NEXT: addi a0, a0, 16
-; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT: vslideup.vi v10, v8, 4
+; CHECK-V-NEXT: vslideup.vi v8, v10, 4
+; CHECK-V-NEXT: vmax.vx v10, v8, zero
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index f43faadc532f26..01a90d8a33b6ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -102,7 +102,9 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -118,7 +120,9 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
@@ -353,7 +357,9 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vs2r.v v12, (a1)
; CHECK-NEXT: ret
@@ -369,7 +375,9 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vs2r.v v12, (a1)
; CHECK-NEXT: ret
@@ -438,7 +446,9 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
@@ -456,7 +466,9 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
@@ -533,7 +545,9 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v12, 0
@@ -553,7 +567,9 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v12, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
index 9c558d23e17e08..d2f73826e4e9e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
@@ -5,7 +5,9 @@
define <4 x i8> @test_v4i16_v4i8(<4 x i16> %x) {
; CHECK-LABEL: test_v4i16_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: ret
%a = icmp sgt <4 x i16> %x, zeroinitializer
@@ -19,7 +21,9 @@ define <4 x i8> @test_v4i16_v4i8(<4 x i16> %x) {
define <4 x i8> @test_v4i32_v4i8(<4 x i32> %x) {
; CHECK-LABEL: test_v4i32_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
@@ -35,7 +39,9 @@ define <4 x i8> @test_v4i32_v4i8(<4 x i32> %x) {
define <4 x i8> @test_v4i64_v4i8(<4 x i64> %x) {
; CHECK-LABEL: test_v4i64_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
@@ -53,7 +59,9 @@ define <4 x i8> @test_v4i64_v4i8(<4 x i64> %x) {
define <4 x i16> @test_v4i32_v4i16(<4 x i32> %x) {
; CHECK-LABEL: test_v4i32_v4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: ret
%a = icmp sgt <4 x i32> %x, zeroinitializer
@@ -67,7 +75,9 @@ define <4 x i16> @test_v4i32_v4i16(<4 x i32> %x) {
define <4 x i16> @test_v4i64_v4i16(<4 x i64> %x) {
; CHECK-LABEL: test_v4i64_v4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
@@ -83,9 +93,10 @@ define <4 x i16> @test_v4i64_v4i16(<4 x i64> %x) {
define <4 x i32> @test_v4i64_v4i32(<4 x i64> %x) {
; CHECK-LABEL: test_v4i64_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vnclipu.wi v10, v8, 0
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmax.vx v10, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: ret
%a = icmp sgt <4 x i64> %x, zeroinitializer
%b = sext <4 x i1> %a to <4 x i64>
@@ -98,7 +109,9 @@ define <4 x i32> @test_v4i64_v4i32(<4 x i64> %x) {
define <vscale x 4 x i8> @test_nxv4i16_nxv4i8(<vscale x 4 x i16> %x) {
; CHECK-LABEL: test_nxv4i16_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: ret
%a = icmp sgt <vscale x 4 x i16> %x, zeroinitializer
@@ -112,7 +125,9 @@ define <vscale x 4 x i8> @test_nxv4i16_nxv4i8(<vscale x 4 x i16> %x) {
define <vscale x 4 x i8> @test_nxv4i32_nxv4i8(<vscale x 4 x i32> %x) {
; CHECK-LABEL: test_nxv4i32_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v10, 0
@@ -128,7 +143,9 @@ define <vscale x 4 x i8> @test_nxv4i32_nxv4i8(<vscale x 4 x i32> %x) {
define <vscale x 4 x i8> @test_nxv4i64_nxv4i8(<vscale x 4 x i64> %x) {
; CHECK-LABEL: test_nxv4i64_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v12, 0
@@ -146,9 +163,10 @@ define <vscale x 4 x i8> @test_nxv4i64_nxv4i8(<vscale x 4 x i64> %x) {
define <vscale x 4 x i16> @test_nxv4i32_nxv4i16(<vscale x 4 x i32> %x) {
; CHECK-LABEL: test_nxv4i32_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnclipu.wi v10, v8, 0
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmax.vx v10, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: ret
%a = icmp sgt <vscale x 4 x i32> %x, zeroinitializer
%b = sext <vscale x 4 x i1> %a to <vscale x 4 x i32>
@@ -161,7 +179,9 @@ define <vscale x 4 x i16> @test_nxv4i32_nxv4i16(<vscale x 4 x i32> %x) {
define <vscale x 4 x i16> @test_nxv4i64_nxv4i16(<vscale x 4 x i64> %x) {
; CHECK-LABEL: test_nxv4i64_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vnclipu.wi v12, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vnclipu.wi v8, v12, 0
@@ -177,9 +197,10 @@ define <vscale x 4 x i16> @test_nxv4i64_nxv4i16(<vscale x 4 x i64> %x) {
define <vscale x 4 x i32> @test_nxv4i64_nxv4i32(<vscale x 4 x i64> %x) {
; CHECK-LABEL: test_nxv4i64_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnclipu.wi v12, v8, 0
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v12, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnclipu.wi v8, v12, 0
; CHECK-NEXT: ret
%a = icmp sgt <vscale x 4 x i64> %x, zeroinitializer
%b = sext <vscale x 4 x i1> %a to <vscale x 4 x i64>
More information about the llvm-commits
mailing list