[llvm] [RISCV] Use vnclip for scalable vector saturating truncation. (PR #88648)

Wed Apr 17 20:29:18 PDT 2024

https://github.com/sun-jacobi updated https://github.com/llvm/llvm-project/pull/88648

>From 84ba7cd035c5e294e956322e3316bc486ab011d5 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sun, 14 Apr 2024 16:55:28 +0900
Subject: [PATCH 1/4] [RISCV] add
 llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll

---
 .../RISCV/rvv/trunc-sat-clip-sdnode.ll        | 444 ++++++++++++++++++
 1 file changed, 444 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
new file mode 100644
index 00000000000000..1666272ce6abdb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+declare <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+
+declare <vscale x 4 x i16> @llvm.umax.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
+declare <vscale x 4 x i32> @llvm.umax.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i64> @llvm.umax.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+declare <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
+
+define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i8i16_maxmin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    li a0, -128
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 127
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i16>, ptr %x, align 16
+  %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -128))
+  %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 127))
+  %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %4, ptr %y, align 8
+  ret void
+}
+
+define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i8i16_minmax:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    li a0, 127
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    li a0, -128
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i16>, ptr %x, align 16
+  %2 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 127))
+  %3 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 -128))
+  %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %4, ptr %y, align 8
+  ret void
+}
+
+define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i8i16_notopt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    li a0, -127
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i16>, ptr %x, align 16
+  %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -127))
+  %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 128))
+  %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %4, ptr %y, align 8
+  ret void
+}
+
+define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_min:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    li a0, 255
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i16>, ptr %x, align 16
+  %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
+  %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %3, ptr %y, align 8
+  ret void
+}
+
+define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_notopt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    li a0, 127
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i16>, ptr %x, align 16
+  %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 127))
+  %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %3, ptr %y, align 8
+  ret void
+}
+
+define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_maxmin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    li a0, 255
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i16>, ptr %x, align 16
+  %2 = tail call <vscale x 4 x i16> @llvm.umax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 0))
+  %3 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 255))
+  %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %4, ptr %y, align 8
+  ret void
+}
+
+define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_minmax:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    li a0, 255
+; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i16>, ptr %x, align 16
+  %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
+  %3 = tail call <vscale x 4 x i16> @llvm.umax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 0))
+  %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %4, ptr %y, align 8
+  ret void
+}
+
+
+define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i16i32_notopt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re32.v v8, (a0)
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i32>, ptr %x, align 32
+  %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32767))
+  %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 32768))
+  %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %4, ptr %y, align 16
+  ret void
+}
+
+define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i16i32_maxmin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re32.v v8, (a0)
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i32>, ptr %x, align 32
+  %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 -32768))
+  %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 32767))
+  %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %4, ptr %y, align 16
+  ret void
+}
+
+define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i16i32_minmax:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re32.v v8, (a0)
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, 1048568
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i32>, ptr %x, align 32
+  %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767))
+  %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 -32768))
+  %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %4, ptr %y, align 16
+  ret void
+}
+
+define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_notopt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re32.v v8, (a0)
+; CHECK-NEXT:    lui a0, 8
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i32>, ptr %x, align 32
+  %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 32767))
+  %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %3, ptr %y, align 16
+  ret void
+}
+
+define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_min:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re32.v v8, (a0)
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i32>, ptr %x, align 32
+  %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
+  %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %3, ptr %y, align 16
+  ret void
+}
+
+define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_minmax:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re32.v v8, (a0)
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i32>, ptr %x, align 32
+  %2 = tail call <vscale x 4 x i32> @llvm.umax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 0))
+  %3 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 65535))
+  %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %4, ptr %y, align 16
+  ret void
+}
+
+define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_maxmin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re32.v v8, (a0)
+; CHECK-NEXT:    lui a0, 16
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vs1r.v v10, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i32>, ptr %x, align 32
+  %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
+  %3 = tail call <vscale x 4 x i32> @llvm.umax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 0))
+  %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %4, ptr %y, align 16
+  ret void
+}
+
+
+define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i32i64_notopt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re64.v v8, (a0)
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addiw a0, a0, 1
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    slli a0, a0, 31
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i64>, ptr %x, align 64
+  %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483647))
+  %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 2147483648))
+  %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %4, ptr %y, align 32
+  ret void
+}
+
+define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i32i64_maxmin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re64.v v8, (a0)
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vmin.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i64>, ptr %x, align 64
+  %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 -2147483648))
+  %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 2147483647))
+  %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %4, ptr %y, align 32
+  ret void
+}
+
+define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i32i64_minmax:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re64.v v8, (a0)
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addiw a2, a0, -1
+; CHECK-NEXT:    vsetvli a3, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vmin.vx v8, v8, a2
+; CHECK-NEXT:    vmax.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i64>, ptr %x, align 64
+  %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647))
+  %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 -2147483648))
+  %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %4, ptr %y, align 32
+  ret void
+}
+
+
+define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_notopt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re64.v v8, (a0)
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    addiw a0, a0, -1
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i64>, ptr %x, align 64
+  %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 2147483647))
+  %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %3, ptr %y, align 32
+  ret void
+}
+
+define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_min:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re64.v v8, (a0)
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i64>, ptr %x, align 64
+  %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
+  %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %3, ptr %y, align 32
+  ret void
+}
+
+
+define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_maxmin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re64.v v8, (a0)
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i64>, ptr %x, align 64
+  %2 = tail call <vscale x 4 x i64> @llvm.umax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0))
+  %3 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 4294967295))
+  %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %4, ptr %y, align 32
+  ret void
+}
+
+define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_minmax:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re64.v v8, (a0)
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vs2r.v v12, (a1)
+; CHECK-NEXT:    ret
+  %1 = load <vscale x 4 x i64>, ptr %x, align 64
+  %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
+  %3 = tail call <vscale x 4 x i64> @llvm.umax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0))
+  %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %4, ptr %y, align 32
+  ret void
+}

>From 4d576238e3d71416c33ac6dca0b6694c72781afe Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sun, 14 Apr 2024 17:09:53 +0900
Subject: [PATCH 2/4] [RISCV] Use vnclip for scalable vector saturating
 truncation.

---
 .../Target/RISCV/RISCVInstrInfoVSDPatterns.td |  50 +++++++
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td |  32 +++--
 ...lip.ll => fixed-vectors-trunc-sat-clip.ll} |   0
 .../RISCV/rvv/trunc-sat-clip-sdnode.ll        | 125 +++++-------------
 4 files changed, 95 insertions(+), 112 deletions(-)
 rename llvm/test/CodeGen/RISCV/rvv/{trunc-sat-clip.ll => fixed-vectors-trunc-sat-clip.ll} (100%)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 82ee4b0cbce903..b6f82ad1a1ef94 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1166,6 +1166,56 @@ defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
 defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>;
 defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>;
 
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+class VPatTruncSatClipMaxMinSDNodeBase<VTypeInfo vti,
+                                       VTypeInfo wti,
+                                       SDPatternOperator op1,
+                                       int op1_value,
+                                       SDPatternOperator op2,
+                                       int op2_value> :
+  Pat<(vti.Vector (riscv_trunc_vector_vl
+        (wti.Vector (op1
+          (wti.Vector (op2 (wti.Vector wti.RegClass:$rs1),
+            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))))),
+          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))))),
+        (vti.Mask V0), VLOpFrag)),
+      (!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
+        (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+        (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+class VPatTruncSatClipUMinSDNode<VTypeInfo vti,
+                                 VTypeInfo wti,
+                                 int uminval> :
+  Pat<(vti.Vector (riscv_trunc_vector_vl
+        (wti.Vector (umin (wti.Vector wti.RegClass:$rs1),
+          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))))), (vti.Mask V0), VLOpFrag)),
+      (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
+        (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+        (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+multiclass VPatTruncSatClipMaxMinSDNode<VTypeInfo vti, VTypeInfo wti,
+  SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
+    def : VPatTruncSatClipMaxMinSDNodeBase<vti, wti, max, maxval, min, minval>;
+    def : VPatTruncSatClipMaxMinSDNodeBase<vti, wti, min, minval, max, maxval>;
+}
+
+multiclass VPatTruncSatClipSDNode<VTypeInfo vti, VTypeInfo wti> {
+  defvar sew = vti.SEW;
+  defvar uminval = !sub(!shl(1, sew), 1);
+  defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
+  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                               GetVTypePredicates<wti>.Predicates) in {
+    defm : VPatTruncSatClipMaxMinSDNode<vti, wti, smin, sminval, smax, smaxval>;
+    def : VPatTruncSatClipUMinSDNode<vti, wti, uminval>;
+  }
+
+}
+
+foreach vtiToWti = AllWidenableIntVectors in
+  defm : VPatTruncSatClipSDNode<vtiToWti.Vti, vtiToWti.Wti>;
+
 // 15. Vector Mask Instructions
 
 // 15.1. Vector Mask-Register Logical Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index f2d97ba1472543..c3534392a5b5a3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2373,13 +2373,12 @@ defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>;
 defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>;
 
 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
-class VPatTruncSatClipMaxMinBase<string inst,
-                                 VTypeInfo vti,
-                                 VTypeInfo wti,
-                                 SDPatternOperator op1,
-                                 int op1_value,
-                                 SDPatternOperator op2,
-                                 int op2_value> :
+class VPatTruncSatClipMaxMinVLBase<VTypeInfo vti,
+                                   VTypeInfo wti,
+                                   SDPatternOperator op1,
+                                   int op1_value,
+                                   SDPatternOperator op2,
+                                   int op2_value> :
   Pat<(vti.Vector (riscv_trunc_vector_vl
         (wti.Vector (op1
           (wti.Vector (op2
@@ -2389,11 +2388,11 @@ class VPatTruncSatClipMaxMinBase<string inst,
           (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
           (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
         (vti.Mask V0), VLOpFrag)),
-      (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
+      (!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
         (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
         (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
 
-class VPatTruncSatClipUMin<VTypeInfo vti,
+class VPatTruncSatClipUMinVL<VTypeInfo vti,
                            VTypeInfo wti,
                            int uminval> :
   Pat<(vti.Vector (riscv_trunc_vector_vl
@@ -2406,13 +2405,13 @@ class VPatTruncSatClipUMin<VTypeInfo vti,
         (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
         (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
 
-multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti,
+multiclass VPatTruncSatClipMaxMinVL<VTypeInfo vti, VTypeInfo wti,
   SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
-    def : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>;
-    def : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>;
+    def : VPatTruncSatClipMaxMinVLBase<vti, wti, max, maxval, min, minval>;
+    def : VPatTruncSatClipMaxMinVLBase<vti, wti, min, minval, max, maxval>;
 }
 
-multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
+multiclass VPatTruncSatClipVL<VTypeInfo vti, VTypeInfo wti> {
   defvar sew = vti.SEW;
   defvar uminval = !sub(!shl(1, sew), 1);
   defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
@@ -2420,15 +2419,14 @@ multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
 
   let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
                                GetVTypePredicates<wti>.Predicates) in {
-    defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
-                                  sminval, riscv_smax_vl, smaxval>;
-    def : VPatTruncSatClipUMin<vti, wti, uminval>;
+    defm : VPatTruncSatClipMaxMinVL<vti, wti, riscv_smin_vl, sminval, riscv_smax_vl, smaxval>;
+    def : VPatTruncSatClipUMinVL<vti, wti, uminval>;
   }
 
 }
 
 foreach vtiToWti = AllWidenableIntVectors in
-  defm : VPatTruncSatClip<vtiToWti.Vti, vtiToWti.Wti>;
+  defm : VPatTruncSatClipVL<vtiToWti.Vti, vtiToWti.Wti>;
 
 // 13. Vector Floating-Point Instructions
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
similarity index 100%
rename from llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll
rename to llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index 1666272ce6abdb..fae6abeed1b538 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -19,13 +19,8 @@ define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_i8i16_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    li a0, -128
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, a0
-; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vmin.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -40,13 +35,8 @@ define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_i8i16_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    li a0, 127
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmin.vx v8, v8, a0
-; CHECK-NEXT:    li a0, -128
-; CHECK-NEXT:    vmax.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnclip.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -82,11 +72,8 @@ define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u16_min:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    li a0, 255
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -118,11 +105,8 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u16_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    li a0, 255
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -137,11 +121,8 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u16_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    li a0, 255
-; CHECK-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -179,14 +160,8 @@ define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_i16i32_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
-; CHECK-NEXT:    lui a0, 1048568
-; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, a0
-; CHECK-NEXT:    lui a0, 8
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    vmin.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnclip.wi v10, v8, 0
 ; CHECK-NEXT:    vs1r.v v10, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
@@ -201,14 +176,8 @@ define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_i16i32_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
-; CHECK-NEXT:    lui a0, 8
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmin.vx v8, v8, a0
-; CHECK-NEXT:    lui a0, 1048568
-; CHECK-NEXT:    vmax.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnclip.wi v10, v8, 0
 ; CHECK-NEXT:    vs1r.v v10, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
@@ -242,12 +211,8 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u16u32_min:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
-; CHECK-NEXT:    lui a0, 16
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vs1r.v v10, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
@@ -261,12 +226,8 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u16u32_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
-; CHECK-NEXT:    lui a0, 16
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vs1r.v v10, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
@@ -281,12 +242,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u16u32_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
-; CHECK-NEXT:    lui a0, 16
-; CHECK-NEXT:    addi a0, a0, -1
-; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vs1r.v v10, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
@@ -325,13 +282,8 @@ define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_i32i64_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    lui a0, 524288
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, a0
-; CHECK-NEXT:    addiw a0, a0, -1
-; CHECK-NEXT:    vmin.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnclip.wi v12, v8, 0
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
@@ -346,13 +298,8 @@ define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_i32i64_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    lui a0, 524288
-; CHECK-NEXT:    addiw a2, a0, -1
-; CHECK-NEXT:    vsetvli a3, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmin.vx v8, v8, a2
-; CHECK-NEXT:    vmax.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnclip.wi v12, v8, 0
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
@@ -387,12 +334,8 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u32u64_min:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    li a0, -1
-; CHECK-NEXT:    srli a0, a0, 32
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
@@ -407,12 +350,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u32u64_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    li a0, -1
-; CHECK-NEXT:    srli a0, a0, 32
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
@@ -427,12 +366,8 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u32u64_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    li a0, -1
-; CHECK-NEXT:    srli a0, a0, 32
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vminu.vx v8, v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64

>From 799215ff1b1c39d2b287f193819fe867bed5b09d Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 15 Apr 2024 22:33:37 +0900
Subject: [PATCH 3/4] [RISCV] refactor VPatTruncSatClipVL and
 VPatTruncSatClipSDNode.

---
 .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 57 ++++++++----------
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 60 +++++++++----------
 2 files changed, 51 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b6f82ad1a1ef94..b4af83a3cbf671 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1167,50 +1167,41 @@ defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>;
 defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>;
 
 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
-class VPatTruncSatClipMaxMinSDNodeBase<VTypeInfo vti,
-                                       VTypeInfo wti,
-                                       SDPatternOperator op1,
-                                       int op1_value,
-                                       SDPatternOperator op2,
-                                       int op2_value> :
-  Pat<(vti.Vector (riscv_trunc_vector_vl
-        (wti.Vector (op1
-          (wti.Vector (op2 (wti.Vector wti.RegClass:$rs1),
-            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))))),
-          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))))),
+multiclass VPatTruncSatClipSDNode<VTypeInfo vti, VTypeInfo wti> {
+  defvar sew = vti.SEW;
+  defvar uminval = !sub(!shl(1, sew), 1);
+  defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
+  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                               GetVTypePredicates<wti>.Predicates) in {
+    def : Pat<(vti.Vector (riscv_trunc_vector_vl
+        (wti.Vector (smin
+          (wti.Vector (smax (wti.Vector wti.RegClass:$rs1),
+            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))))),
+          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))))),
+        (vti.Mask V0), VLOpFrag)),
+      (!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
+        (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+        (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+    def : Pat<(vti.Vector (riscv_trunc_vector_vl
+        (wti.Vector (smax
+          (wti.Vector (smin (wti.Vector wti.RegClass:$rs1),
+            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))))),
+          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))))),
         (vti.Mask V0), VLOpFrag)),
       (!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
         (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
         (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
 
-class VPatTruncSatClipUMinSDNode<VTypeInfo vti,
-                                 VTypeInfo wti,
-                                 int uminval> :
-  Pat<(vti.Vector (riscv_trunc_vector_vl
+    def : Pat<(vti.Vector (riscv_trunc_vector_vl
         (wti.Vector (umin (wti.Vector wti.RegClass:$rs1),
           (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))))), (vti.Mask V0), VLOpFrag)),
       (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
         (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
         (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
-
-multiclass VPatTruncSatClipMaxMinSDNode<VTypeInfo vti, VTypeInfo wti,
-  SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
-    def : VPatTruncSatClipMaxMinSDNodeBase<vti, wti, max, maxval, min, minval>;
-    def : VPatTruncSatClipMaxMinSDNodeBase<vti, wti, min, minval, max, maxval>;
-}
-
-multiclass VPatTruncSatClipSDNode<VTypeInfo vti, VTypeInfo wti> {
-  defvar sew = vti.SEW;
-  defvar uminval = !sub(!shl(1, sew), 1);
-  defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
-  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
-
-  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
-                               GetVTypePredicates<wti>.Predicates) in {
-    defm : VPatTruncSatClipMaxMinSDNode<vti, wti, smin, sminval, smax, smaxval>;
-    def : VPatTruncSatClipUMinSDNode<vti, wti, uminval>;
   }
-
 }
 
 foreach vtiToWti = AllWidenableIntVectors in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index c3534392a5b5a3..6c6ecb604fd034 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2373,29 +2373,41 @@ defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>;
 defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>;
 
 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
-class VPatTruncSatClipMaxMinVLBase<VTypeInfo vti,
-                                   VTypeInfo wti,
-                                   SDPatternOperator op1,
-                                   int op1_value,
-                                   SDPatternOperator op2,
-                                   int op2_value> :
-  Pat<(vti.Vector (riscv_trunc_vector_vl
-        (wti.Vector (op1
-          (wti.Vector (op2
+multiclass VPatTruncSatClipVL<VTypeInfo vti, VTypeInfo wti> {
+  defvar sew = vti.SEW;
+  defvar uminval = !sub(!shl(1, sew), 1);
+  defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
+  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                               GetVTypePredicates<wti>.Predicates) in {
+    def : Pat<(vti.Vector (riscv_trunc_vector_vl
+        (wti.Vector (riscv_smin_vl
+          (wti.Vector (riscv_smax_vl
+            (wti.Vector wti.RegClass:$rs1),
+            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))),
+            (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
+          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))),
+          (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+        (vti.Mask V0), VLOpFrag)),
+      (!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
+        (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+        (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+    def : Pat<(vti.Vector (riscv_trunc_vector_vl
+        (wti.Vector (riscv_smax_vl
+          (wti.Vector (riscv_smin_vl
             (wti.Vector wti.RegClass:$rs1),
-            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
+            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))),
             (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
-          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
+          (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), smaxval, (XLenVT srcvalue))),
           (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
         (vti.Mask V0), VLOpFrag)),
       (!cast<Instruction>("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK")
         (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
         (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
 
-class VPatTruncSatClipUMinVL<VTypeInfo vti,
-                           VTypeInfo wti,
-                           int uminval> :
-  Pat<(vti.Vector (riscv_trunc_vector_vl
+    def : Pat<(vti.Vector (riscv_trunc_vector_vl
         (wti.Vector (riscv_umin_vl
           (wti.Vector wti.RegClass:$rs1),
           (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
@@ -2404,25 +2416,7 @@ class VPatTruncSatClipUMinVL<VTypeInfo vti,
       (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
         (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
         (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
-
-multiclass VPatTruncSatClipMaxMinVL<VTypeInfo vti, VTypeInfo wti,
-  SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
-    def : VPatTruncSatClipMaxMinVLBase<vti, wti, max, maxval, min, minval>;
-    def : VPatTruncSatClipMaxMinVLBase<vti, wti, min, minval, max, maxval>;
-}
-
-multiclass VPatTruncSatClipVL<VTypeInfo vti, VTypeInfo wti> {
-  defvar sew = vti.SEW;
-  defvar uminval = !sub(!shl(1, sew), 1);
-  defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
-  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
-
-  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
-                               GetVTypePredicates<wti>.Predicates) in {
-    defm : VPatTruncSatClipMaxMinVL<vti, wti, riscv_smin_vl, sminval, riscv_smax_vl, smaxval>;
-    def : VPatTruncSatClipUMinVL<vti, wti, uminval>;
   }
-
 }
 
 foreach vtiToWti = AllWidenableIntVectors in

>From c934041cc0a83f7d6f30b5a86ff1581770b76c22 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 15 Apr 2024 22:37:41 +0900
Subject: [PATCH 4/4] [RISCV] remove umax in trunc-sat-clip-sdnode.ll and
 fixed-vectors-trunc-sat-clip.ll.

---
 .../RISCV/rvv/fixed-vectors-trunc-sat-clip.ll | 39 +++++++------------
 .../RISCV/rvv/trunc-sat-clip-sdnode.ll        | 39 +++++++------------
 2 files changed, 30 insertions(+), 48 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index 42577408f71b00..a0d4718e9e851c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -8,11 +8,8 @@ declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
 
-declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
 declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
 
 define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
@@ -110,10 +107,9 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i16>, ptr %x, align 16
-  %2 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %1, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
-  %3 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
-  %4 = trunc <4 x i16> %3 to <4 x i8>
-  store <4 x i8> %4, ptr %y, align 8
+  %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
+  %3 = trunc <4 x i16> %2 to <4 x i8>
+  store <4 x i8> %3, ptr %y, align 8
   ret void
 }
 
@@ -127,9 +123,8 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    ret
   %1 = load <4 x i16>, ptr %x, align 16
   %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
-  %3 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %2, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
-  %4 = trunc <4 x i16> %3 to <4 x i8>
-  store <4 x i8> %4, ptr %y, align 8
+  %3 = trunc <4 x i16> %2 to <4 x i8>
+  store <4 x i8> %3, ptr %y, align 8
   ret void
 }
 
@@ -231,10 +226,9 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vse16.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i32>, ptr %x, align 32
-  %2 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
-  %3 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
-  %4 = trunc <4 x i32> %3 to <4 x i16>
-  store <4 x i16> %4, ptr %y, align 16
+  %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+  %3 = trunc <4 x i32> %2 to <4 x i16>
+  store <4 x i16> %3, ptr %y, align 16
   ret void
 }
 
@@ -248,9 +242,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    ret
   %1 = load <4 x i32>, ptr %x, align 32
   %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
-  %3 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
-  %4 = trunc <4 x i32> %3 to <4 x i16>
-  store <4 x i16> %4, ptr %y, align 16
+  %3 = trunc <4 x i32> %2 to <4 x i16>
+  store <4 x i16> %3, ptr %y, align 16
   ret void
 }
 
@@ -355,10 +348,9 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vse32.v v10, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 64
-  %2 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %1, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
-  %3 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
-  %4 = trunc <4 x i64> %3 to <4 x i32>
-  store <4 x i32> %4, ptr %y, align 32
+  %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+  %3 = trunc <4 x i64> %2 to <4 x i32>
+  store <4 x i32> %3, ptr %y, align 32
   ret void
 }
 
@@ -372,8 +364,7 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    ret
   %1 = load <4 x i64>, ptr %x, align 64
   %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
-  %3 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %2, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
-  %4 = trunc <4 x i64> %3 to <4 x i32>
-  store <4 x i32> %4, ptr %y, align 32
+  %3 = trunc <4 x i64> %2 to <4 x i32>
+  store <4 x i32> %3, ptr %y, align 32
   ret void
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index fae6abeed1b538..7886eb162cd702 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -8,11 +8,8 @@ declare <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i3
 declare <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
 declare <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
 
-declare <vscale x 4 x i16> @llvm.umax.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
 declare <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 4 x i32> @llvm.umax.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 4 x i64> @llvm.umax.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
 declare <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
 
 define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
@@ -110,10 +107,9 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i16>, ptr %x, align 16
-  %2 = tail call <vscale x 4 x i16> @llvm.umax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 0))
-  %3 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 255))
-  %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
-  store <vscale x 4 x i8> %4, ptr %y, align 8
+  %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
+  %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %3, ptr %y, align 8
   ret void
 }
 
@@ -127,9 +123,8 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i16>, ptr %x, align 16
   %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
-  %3 = tail call <vscale x 4 x i16> @llvm.umax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 0))
-  %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
-  store <vscale x 4 x i8> %4, ptr %y, align 8
+  %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
+  store <vscale x 4 x i8> %3, ptr %y, align 8
   ret void
 }
 
@@ -231,10 +226,9 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vs1r.v v10, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
-  %2 = tail call <vscale x 4 x i32> @llvm.umax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 0))
-  %3 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 65535))
-  %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
-  store <vscale x 4 x i16> %4, ptr %y, align 16
+  %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
+  %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %3, ptr %y, align 16
   ret void
 }
 
@@ -248,9 +242,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i32>, ptr %x, align 32
   %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
-  %3 = tail call <vscale x 4 x i32> @llvm.umax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 0))
-  %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
-  store <vscale x 4 x i16> %4, ptr %y, align 16
+  %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
+  store <vscale x 4 x i16> %3, ptr %y, align 16
   ret void
 }
 
@@ -355,10 +348,9 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
-  %2 = tail call <vscale x 4 x i64> @llvm.umax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0))
-  %3 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 4294967295))
-  %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
-  store <vscale x 4 x i32> %4, ptr %y, align 32
+  %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
+  %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %3, ptr %y, align 32
   ret void
 }
 
@@ -372,8 +364,7 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
 ; CHECK-NEXT:    ret
   %1 = load <vscale x 4 x i64>, ptr %x, align 64
   %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
-  %3 = tail call <vscale x 4 x i64> @llvm.umax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0))
-  %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
-  store <vscale x 4 x i32> %4, ptr %y, align 32
+  %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
+  store <vscale x 4 x i32> %3, ptr %y, align 32
   ret void
 }