[llvm] [RISCV] Replace duplicate trunc-sat-clip tests with more interesting tests. NFC (PR #93737)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed May 29 13:42:27 PDT 2024
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/93737
For each pair of types, we had 3 identical tests using umin with the unsigned max value.
This patches replaces two of them with smin+smax cases that can be implemented with a signed vmax followed by a vnclipu.
>From 6b35106ac7966cf1ab2f64cbd762998e5c81a78f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 29 May 2024 13:40:18 -0700
Subject: [PATCH] [RISCV] Replace duplicate trunc-sat-clip tests with more
interesting tests. NFC
For each pair of types, we had 3 identical tests using umin with
the unsigned max value.
This patches replaces two of them with smin+smax cases that can be
implemented with a signed vmax followed by a vnclipu.
---
.../RISCV/rvv/fixed-vectors-trunc-sat-clip.ll | 118 ++++++++++++------
.../RISCV/rvv/trunc-sat-clip-sdnode.ll | 118 ++++++++++++------
2 files changed, 160 insertions(+), 76 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index a0d4718e9e851..414b23ffb582a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -98,33 +98,45 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
ret void
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i16>, ptr %x, align 16
- %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
- %3 = trunc <4 x i16> %2 to <4 x i8>
- store <4 x i8> %3, ptr %y, align 8
+ %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> zeroinitializer)
+ %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
+ %4 = trunc <4 x i16> %3 to <4 x i8>
+ store <4 x i8> %4, ptr %y, align 8
ret void
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i16>, ptr %x, align 16
- %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
- %3 = trunc <4 x i16> %2 to <4 x i8>
- store <4 x i8> %3, ptr %y, align 8
+ %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
+ %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> zeroinitializer)
+ %4 = trunc <4 x i16> %3 to <4 x i8>
+ store <4 x i8> %4, ptr %y, align 8
ret void
}
@@ -217,33 +229,49 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
ret void
}
-define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
-; CHECK-LABEL: trunc_sat_u16u32_minmax:
+; FIXME: This can be a signed vmax followed by vnclipu.
+define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
- %1 = load <4 x i32>, ptr %x, align 32
- %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
- %3 = trunc <4 x i32> %2 to <4 x i16>
- store <4 x i16> %3, ptr %y, align 16
+ %1 = load <4 x i32>, ptr %x, align 16
+ %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %4 = trunc <4 x i32> %3 to <4 x i16>
+ store <4 x i16> %4, ptr %y, align 8
ret void
}
-define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
-; CHECK-LABEL: trunc_sat_u16u32_maxmin:
+; FIXME: This can be a signed vmax followed by vnclipu.
+define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: li a0, 50
+; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
- %1 = load <4 x i32>, ptr %x, align 32
- %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
- %3 = trunc <4 x i32> %2 to <4 x i16>
- store <4 x i16> %3, ptr %y, align 16
+ %1 = load <4 x i32>, ptr %x, align 16
+ %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 50, i32 50, i32 50, i32 50>)
+ %4 = trunc <4 x i32> %3 to <4 x i16>
+ store <4 x i16> %4, ptr %y, align 8
ret void
}
@@ -339,32 +367,46 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vnclipu.wi v10, v8, 0
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
- %1 = load <4 x i64>, ptr %x, align 64
- %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
- %3 = trunc <4 x i64> %2 to <4 x i32>
- store <4 x i32> %3, ptr %y, align 32
+ %1 = load <4 x i64>, ptr %x, align 16
+ %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
+ %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %4 = trunc <4 x i64> %3 to <4 x i32>
+ store <4 x i32> %4, ptr %y, align 8
ret void
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vnclipu.wi v10, v8, 0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vse32.v v10, (a1)
; CHECK-NEXT: ret
- %1 = load <4 x i64>, ptr %x, align 64
- %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
- %3 = trunc <4 x i64> %2 to <4 x i32>
- store <4 x i32> %3, ptr %y, align 32
+ %1 = load <4 x i64>, ptr %x, align 16
+ %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer)
+ %4 = trunc <4 x i64> %3 to <4 x i32>
+ store <4 x i32> %4, ptr %y, align 8
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index 7886eb162cd70..fcb49c2187191 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -98,33 +98,45 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
ret void
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i16>, ptr %x, align 16
- %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
- %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
- store <vscale x 4 x i8> %3, ptr %y, align 8
+ %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 0))
+ %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 255))
+ %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
+ store <vscale x 4 x i8> %4, ptr %y, align 8
ret void
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u16_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: li a0, 255
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <vscale x 4 x i16>, ptr %x, align 16
- %2 = tail call <vscale x 4 x i16> @llvm.umin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
- %3 = trunc <vscale x 4 x i16> %2 to <vscale x 4 x i8>
- store <vscale x 4 x i8> %3, ptr %y, align 8
+ %2 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 255))
+ %3 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 0))
+ %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8>
+ store <vscale x 4 x i8> %4, ptr %y, align 8
ret void
}
@@ -217,33 +229,49 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
ret void
}
-define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
-; CHECK-LABEL: trunc_sat_u16u32_minmax:
+; FIXME: This can be a signed vmax followed by vnclipu.
+define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnclipu.wi v10, v8, 0
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vs1r.v v10, (a1)
; CHECK-NEXT: ret
- %1 = load <vscale x 4 x i32>, ptr %x, align 32
- %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
- %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
- store <vscale x 4 x i16> %3, ptr %y, align 16
+ %1 = load <vscale x 4 x i32>, ptr %x, align 16
+ %2 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 1))
+ %3 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 65535))
+ %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
+ store <vscale x 4 x i16> %4, ptr %y, align 8
ret void
}
-define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
-; CHECK-LABEL: trunc_sat_u16u32_maxmin:
+; FIXME: This can be a signed vmax followed by vnclipu.
+define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl2re32.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnclipu.wi v10, v8, 0
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: li a0, 50
+; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vs1r.v v10, (a1)
; CHECK-NEXT: ret
- %1 = load <vscale x 4 x i32>, ptr %x, align 32
- %2 = tail call <vscale x 4 x i32> @llvm.umin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
- %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
- store <vscale x 4 x i16> %3, ptr %y, align 16
+ %1 = load <vscale x 4 x i32>, ptr %x, align 16
+ %2 = tail call <vscale x 4 x i32> @llvm.smin.v4i32(<vscale x 4 x i32> %1, <vscale x 4 x i32> splat (i32 65535))
+ %3 = tail call <vscale x 4 x i32> @llvm.smax.v4i32(<vscale x 4 x i32> %2, <vscale x 4 x i32> splat (i32 50))
+ %4 = trunc <vscale x 4 x i32> %3 to <vscale x 4 x i16>
+ store <vscale x 4 x i16> %4, ptr %y, align 8
ret void
}
@@ -339,32 +367,46 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnclipu.wi v12, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
; CHECK-NEXT: vs2r.v v12, (a1)
; CHECK-NEXT: ret
- %1 = load <vscale x 4 x i64>, ptr %x, align 64
- %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
- %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
- store <vscale x 4 x i32> %3, ptr %y, align 32
+ %1 = load <vscale x 4 x i64>, ptr %x, align 16
+ %2 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 0))
+ %3 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 4294967295))
+ %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
+ store <vscale x 4 x i32> %4, ptr %y, align 8
ret void
}
+; FIXME: This can be a signed vmax followed by vnclipu.
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u32u64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vl4re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnclipu.wi v12, v8, 0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vmax.vx v8, v8, zero
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
; CHECK-NEXT: vs2r.v v12, (a1)
; CHECK-NEXT: ret
- %1 = load <vscale x 4 x i64>, ptr %x, align 64
- %2 = tail call <vscale x 4 x i64> @llvm.umin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
- %3 = trunc <vscale x 4 x i64> %2 to <vscale x 4 x i32>
- store <vscale x 4 x i32> %3, ptr %y, align 32
+ %1 = load <vscale x 4 x i64>, ptr %x, align 16
+ %2 = tail call <vscale x 4 x i64> @llvm.smin.v4i64(<vscale x 4 x i64> %1, <vscale x 4 x i64> splat (i64 4294967295))
+ %3 = tail call <vscale x 4 x i64> @llvm.smax.v4i64(<vscale x 4 x i64> %2, <vscale x 4 x i64> splat (i64 0))
+ %4 = trunc <vscale x 4 x i64> %3 to <vscale x 4 x i32>
+ store <vscale x 4 x i32> %4, ptr %y, align 8
ret void
}
More information about the llvm-commits
mailing list