[llvm] [RISCV][ISel] Remove redundant min/max in saturating truncation (PR #75145)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 28 05:30:10 PST 2023
https://github.com/sun-jacobi updated https://github.com/llvm/llvm-project/pull/75145
>From ce8e9f490384dde255e49d329f8c89765efbd73f Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Tue, 12 Dec 2023 16:24:25 +0900
Subject: [PATCH 01/10] [RISCV][ISel] remove redundant min/max followed by a
trunc.
Fixes #73424.
If the range created by a min and max is precisely the range of trunc target,
the min/max could be removed.
---
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 54 +++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index dc6b57fad32105..91eb9d775682c2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1618,6 +1618,60 @@ multiclass VPatBinaryFPWVL_VV_VF_WV_WF_RM<SDNode vop, SDNode vop_w, string instr
}
}
+
+multiclass VPatTruncSplatMaxMinIdentityBase<VTypeInfo vti, VTypeInfo wti,
+ SDPatternOperator vop1, int vid1, SDPatternOperator vop2, int vid2> {
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (vop1
+ (wti.Vector (vop2
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid2, (XLenVT srcvalue))),
+ (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid1, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+}
+
+multiclass VPatTruncSplatMinIdentityBase<VTypeInfo vti, VTypeInfo wti,
+ SDPatternOperator vop, int vid> {
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (vop
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+}
+
+
+multiclass VPatTruncSplatMaxMinIdentity<VTypeInfo vti, VTypeInfo wti> {
+ defvar sew = vti.SEW;
+ defvar umin_id = !sub(!shl(1, sew), 1);
+ defvar umax_id = 0;
+ defvar smin_id = !sub(!shl(1, !sub(sew, 1)), 1);
+ defvar smax_id = !sub(0, !shl(1, !sub(sew, 1)));
+
+ defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_umax_vl, umax_id, riscv_umin_vl, umin_id>;
+ defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_umin_vl, umin_id, riscv_umax_vl, umax_id>;
+ defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_smin_vl, smin_id, riscv_smax_vl, smax_id>;
+ defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_smax_vl, smax_id, riscv_smin_vl, smin_id>;
+
+ defm : VPatTruncSplatMinIdentityBase<vti, wti, riscv_umin_vl, umin_id>;
+
+}
+
+foreach vtiToWti = AllWidenableIntVectors in
+ defm : VPatTruncSplatMaxMinIdentity<vtiToWti.Vti, vtiToWti.Wti>;
+
multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruction_name> {
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
>From 904dc6d5edac181ba43acacd30c88313012be9c4 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sat, 16 Dec 2023 15:19:29 +0900
Subject: [PATCH 02/10] [RISCV][Isel] use vnclip for saturating truncation.
---
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 111 +++++++++---------
1 file changed, 57 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 91eb9d775682c2..66db75f071cc0c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1618,60 +1618,6 @@ multiclass VPatBinaryFPWVL_VV_VF_WV_WF_RM<SDNode vop, SDNode vop_w, string instr
}
}
-
-multiclass VPatTruncSplatMaxMinIdentityBase<VTypeInfo vti, VTypeInfo wti,
- SDPatternOperator vop1, int vid1, SDPatternOperator vop2, int vid2> {
- let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
- GetVTypePredicates<wti>.Predicates) in
- def : Pat<(vti.Vector (riscv_trunc_vector_vl
- (wti.Vector (vop1
- (wti.Vector (vop2
- (wti.Vector wti.RegClass:$rs1),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid2, (XLenVT srcvalue))),
- (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid1, (XLenVT srcvalue))),
- (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
- (vti.Mask V0), VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK")
- (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
-}
-
-multiclass VPatTruncSplatMinIdentityBase<VTypeInfo vti, VTypeInfo wti,
- SDPatternOperator vop, int vid> {
- let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
- GetVTypePredicates<wti>.Predicates) in
- def : Pat<(vti.Vector (riscv_trunc_vector_vl
- (wti.Vector (vop
- (wti.Vector wti.RegClass:$rs1),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid, (XLenVT srcvalue))),
- (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
- (vti.Mask V0), VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK")
- (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
-}
-
-
-multiclass VPatTruncSplatMaxMinIdentity<VTypeInfo vti, VTypeInfo wti> {
- defvar sew = vti.SEW;
- defvar umin_id = !sub(!shl(1, sew), 1);
- defvar umax_id = 0;
- defvar smin_id = !sub(!shl(1, !sub(sew, 1)), 1);
- defvar smax_id = !sub(0, !shl(1, !sub(sew, 1)));
-
- defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_umax_vl, umax_id, riscv_umin_vl, umin_id>;
- defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_umin_vl, umin_id, riscv_umax_vl, umax_id>;
- defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_smin_vl, smin_id, riscv_smax_vl, smax_id>;
- defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_smax_vl, smax_id, riscv_smin_vl, smin_id>;
-
- defm : VPatTruncSplatMinIdentityBase<vti, wti, riscv_umin_vl, umin_id>;
-
-}
-
-foreach vtiToWti = AllWidenableIntVectors in
- defm : VPatTruncSplatMaxMinIdentity<vtiToWti.Vti, vtiToWti.Wti>;
-
multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruction_name> {
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
@@ -2382,6 +2328,63 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+multiclass VPatTruncSatClipMaxMinBase<string inst, VTypeInfo vti, VTypeInfo wti,
+ SDPatternOperator op1, int op1_value, SDPatternOperator op2, int op2_value> {
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (op1
+ (wti.Vector (op2
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
+ (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)), (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+}
+
+multiclass VPatTruncSatClipUMin<VTypeInfo vti, VTypeInfo wti, int sminval> {
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (riscv_smin_vl
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+}
+
+
+multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti,
+ SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
+ defm : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>;
+ defm : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>;
+}
+
+multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
+ defvar sew = vti.SEW;
+ defvar uminval = !sub(!shl(1, sew), 1);
+ defvar umaxval = 0;
+ defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
+ defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+ defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_umax_vl,
+ umaxval, riscv_umin_vl, uminval>;
+ defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_smin_vl,
+ sminval, riscv_smax_vl, smaxval>;
+
+ defm : VPatTruncSatClipUMin<vti, wti, sminval>;
+}
+
+foreach vtiToWti = AllWidenableIntVectors in
+ defm : VPatTruncSatClip<vtiToWti.Vti, vtiToWti.Wti>;
+
// 13. Vector Floating-Point Instructions
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
>From be820a686bebcb308e22ce6a967c98ec33c294a0 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sat, 16 Dec 2023 15:36:26 +0900
Subject: [PATCH 03/10] [RISCV][Isel] fix (s|u)(max|min) value usage for
saturating truncation.
---
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 23 ++++++++++---------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 66db75f071cc0c..f489a8aa9118d9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2335,18 +2335,19 @@ multiclass VPatTruncSatClipMaxMinBase<string inst, VTypeInfo vti, VTypeInfo wti,
GetVTypePredicates<wti>.Predicates) in
def : Pat<(vti.Vector (riscv_trunc_vector_vl
(wti.Vector (op1
- (wti.Vector (op2
- (wti.Vector wti.RegClass:$rs1),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
- (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
- (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
- (vti.Mask V0), VLOpFrag)), (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
+ (wti.Vector (op2
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
+ (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
(vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
}
-multiclass VPatTruncSatClipUMin<VTypeInfo vti, VTypeInfo wti, int sminval> {
+multiclass VPatTruncSatClipUMin<VTypeInfo vti, VTypeInfo wti, int uminval> {
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<wti>.Predicates) in
def : Pat<(vti.Vector (riscv_trunc_vector_vl
@@ -2374,12 +2375,12 @@ multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
- defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_umax_vl,
+ defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_umax_vl,
umaxval, riscv_umin_vl, uminval>;
- defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_smin_vl,
+ defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
sminval, riscv_smax_vl, smaxval>;
- defm : VPatTruncSatClipUMin<vti, wti, sminval>;
+ defm : VPatTruncSatClipUMin<vti, wti, uminval>;
}
foreach vtiToWti = AllWidenableIntVectors in
>From ac9b02331a37c4f13f8f6a6e9e1c3b5071f53c8c Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sat, 16 Dec 2023 15:40:15 +0900
Subject: [PATCH 04/10] [RISCV] fix uminval in VPatTruncSatClipUMin.
---
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index f489a8aa9118d9..da292916750d2c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2353,7 +2353,7 @@ multiclass VPatTruncSatClipUMin<VTypeInfo vti, VTypeInfo wti, int uminval> {
def : Pat<(vti.Vector (riscv_trunc_vector_vl
(wti.Vector (riscv_smin_vl
(wti.Vector wti.RegClass:$rs1),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), sminval, (XLenVT srcvalue))),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
(wti.Vector undef), (wti.Mask V0), VLOpFrag)),
(vti.Mask V0), VLOpFrag)),
(!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
>From 96343d29d3dd880fe4b32c8b3ba7542af79be02f Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sat, 16 Dec 2023 17:15:35 +0900
Subject: [PATCH 05/10] [RISCV][ISel] update fpclamptosat_vec.ll
---
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 94 +++++--------------
1 file changed, 24 insertions(+), 70 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 7497051027fa37..3f9e5714eda27b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -39,12 +39,9 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 524288
-; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a1
-; CHECK-V-NEXT: vmax.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -198,13 +195,8 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT: lui a0, 524288
-; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT: vmin.vx v8, v10, a1
-; CHECK-V-NEXT: vmax.vx v10, v8, a0
-; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -510,12 +502,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
-; CHECK-V-NEXT: lui a0, 524288
-; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v10, a1
-; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
@@ -925,13 +914,9 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v9, a0
-; CHECK-V-NEXT: lui a0, 1048568
-; CHECK-V-NEXT: vmax.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -1087,13 +1072,9 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
-; CHECK-V-NEXT: lui a0, 1048568
-; CHECK-V-NEXT: vmax.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -1525,13 +1506,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
-; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v10, a0
-; CHECK-V-NEXT: lui a0, 1048568
-; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
@@ -3385,12 +3362,9 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 524288
-; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a1
-; CHECK-V-NEXT: vmax.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -3539,13 +3513,8 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT: lui a0, 524288
-; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT: vmin.vx v8, v10, a1
-; CHECK-V-NEXT: vmax.vx v10, v8, a0
-; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
@@ -3846,12 +3815,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
-; CHECK-V-NEXT: lui a0, 524288
-; CHECK-V-NEXT: addiw a1, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v10, a1
-; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
@@ -4256,13 +4222,9 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v9, a0
-; CHECK-V-NEXT: lui a0, 1048568
-; CHECK-V-NEXT: vmax.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
@@ -4413,13 +4375,9 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v8, a0
-; CHECK-V-NEXT: lui a0, 1048568
-; CHECK-V-NEXT: vmax.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -4846,13 +4804,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
-; CHECK-V-NEXT: lui a0, 8
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vmin.vx v8, v10, a0
-; CHECK-V-NEXT: lui a0, 1048568
-; CHECK-V-NEXT: vmax.vx v10, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclip.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
>From 871733ed272d7e67031366a6a197c2399fac3b8b Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 18 Dec 2023 16:42:56 +0900
Subject: [PATCH 06/10] [RISCV] add trunc-sat-clip.ll test
---
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 2 +-
llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll | 394 ++++++++++++++++++
2 files changed, 395 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index da292916750d2c..e92a1796eab47f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2351,7 +2351,7 @@ multiclass VPatTruncSatClipUMin<VTypeInfo vti, VTypeInfo wti, int uminval> {
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<wti>.Predicates) in
def : Pat<(vti.Vector (riscv_trunc_vector_vl
- (wti.Vector (riscv_smin_vl
+ (wti.Vector (riscv_umin_vl
(wti.Vector wti.RegClass:$rs1),
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
(wti.Vector undef), (wti.Mask V0), VLOpFrag)),
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll
new file mode 100644
index 00000000000000..e12c9e515a9fd4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip.ll
@@ -0,0 +1,394 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
+
+declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
+
+define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i8i16_maxmin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i16>, ptr %x, align 16
+ %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
+ %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
+ %4 = trunc <4 x i16> %3 to <4 x i8>
+ store <4 x i8> %4, ptr %y, align 8
+ ret void
+}
+
+define void @trunc_sat_i8i16_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i8i16_minmax:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i16>, ptr %x, align 16
+ %2 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
+ %3 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %2, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
+ %4 = trunc <4 x i16> %3 to <4 x i8>
+ store <4 x i8> %4, ptr %y, align 8
+ ret void
+}
+
+define void @trunc_sat_i8i16_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i8i16_notopt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: li a0, -127
+; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i16>, ptr %x, align 16
+ %2 = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %1, <4 x i16> <i16 -127, i16 -127, i16 -127, i16 -127>)
+ %3 = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %2, <4 x i16> <i16 128, i16 128, i16 128, i16 128>)
+ %4 = trunc <4 x i16> %3 to <4 x i8>
+ store <4 x i8> %4, ptr %y, align 8
+ ret void
+}
+
+define void @trunc_sat_u8u16_min(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i16>, ptr %x, align 16
+ %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
+ %3 = trunc <4 x i16> %2 to <4 x i8>
+ store <4 x i8> %3, ptr %y, align 8
+ ret void
+}
+
+define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_notopt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i16>, ptr %x, align 16
+ %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
+ %3 = trunc <4 x i16> %2 to <4 x i8>
+ store <4 x i8> %3, ptr %y, align 8
+ ret void
+}
+
+define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_maxmin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i16>, ptr %x, align 16
+ %2 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %1, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
+ %3 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %2, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
+ %4 = trunc <4 x i16> %3 to <4 x i8>
+ store <4 x i8> %4, ptr %y, align 8
+ ret void
+}
+
+define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u8u16_minmax:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vse8.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i16>, ptr %x, align 16
+ %2 = tail call <4 x i16> @llvm.umin.v4i16(<4 x i16> %1, <4 x i16> <i16 255, i16 255, i16 255, i16 255>)
+ %3 = tail call <4 x i16> @llvm.umax.v4i16(<4 x i16> %2, <4 x i16> <i16 0, i16 0, i16 0, i16 0>)
+ %4 = trunc <4 x i16> %3 to <4 x i8>
+ store <4 x i8> %4, ptr %y, align 8
+ ret void
+}
+
+
+define void @trunc_sat_i16i32_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i16i32_notopt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i32>, ptr %x, align 32
+ %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32767, i32 -32767, i32 -32767, i32 -32767>)
+ %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>)
+ %4 = trunc <4 x i32> %3 to <4 x i16>
+ store <4 x i16> %4, ptr %y, align 16
+ ret void
+}
+
+define void @trunc_sat_i16i32_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i16i32_maxmin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i32>, ptr %x, align 32
+ %2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %4 = trunc <4 x i32> %3 to <4 x i16>
+ store <4 x i16> %4, ptr %y, align 16
+ ret void
+}
+
+define void @trunc_sat_i16i32_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i16i32_minmax:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclip.wi v8, v8, 0
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i32>, ptr %x, align 32
+ %2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %4 = trunc <4 x i32> %3 to <4 x i16>
+ store <4 x i16> %4, ptr %y, align 16
+ ret void
+}
+
+define void @trunc_sat_u16u32_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_notopt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i32>, ptr %x, align 32
+ %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %3 = trunc <4 x i32> %2 to <4 x i16>
+ store <4 x i16> %3, ptr %y, align 16
+ ret void
+}
+
+define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i32>, ptr %x, align 32
+ %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %3 = trunc <4 x i32> %2 to <4 x i16>
+ store <4 x i16> %3, ptr %y, align 16
+ ret void
+}
+
+define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_minmax:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i32>, ptr %x, align 32
+ %2 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ %3 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %2, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %4 = trunc <4 x i32> %3 to <4 x i16>
+ store <4 x i16> %4, ptr %y, align 16
+ ret void
+}
+
+define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u16u32_maxmin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v8, v8, 0
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i32>, ptr %x, align 32
+ %2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %3 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ %4 = trunc <4 x i32> %3 to <4 x i16>
+ store <4 x i16> %4, ptr %y, align 16
+ ret void
+}
+
+
+define void @trunc_sat_i32i64_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i32i64_notopt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: addiw a0, a0, 1
+; CHECK-NEXT: vmax.vx v8, v8, a0
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: slli a0, a0, 31
+; CHECK-NEXT: vmin.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i64>, ptr %x, align 64
+ %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483647, i64 -2147483647, i64 -2147483647, i64 -2147483647>)
+ %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483648, i64 2147483648, i64 2147483648, i64 2147483648>)
+ %4 = trunc <4 x i64> %3 to <4 x i32>
+ store <4 x i32> %4, ptr %y, align 32
+ ret void
+}
+
+define void @trunc_sat_i32i64_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i32i64_maxmin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i64>, ptr %x, align 64
+ %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %4 = trunc <4 x i64> %3 to <4 x i32>
+ store <4 x i32> %4, ptr %y, align 32
+ ret void
+}
+
+define void @trunc_sat_i32i64_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_i32i64_minmax:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclip.wi v10, v8, 0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i64>, ptr %x, align 64
+ %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %4 = trunc <4 x i64> %3 to <4 x i32>
+ store <4 x i32> %4, ptr %y, align 32
+ ret void
+}
+
+
+define void @trunc_sat_u32u64_notopt(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_notopt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: addiw a0, a0, -1
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i64>, ptr %x, align 64
+ %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %3 = trunc <4 x i64> %2 to <4 x i32>
+ store <4 x i32> %3, ptr %y, align 32
+ ret void
+}
+
+define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i64>, ptr %x, align 64
+ %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %3 = trunc <4 x i64> %2 to <4 x i32>
+ store <4 x i32> %3, ptr %y, align 32
+ ret void
+}
+
+
+define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_maxmin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i64>, ptr %x, align 64
+ %2 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %1, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
+ %3 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %2, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %4 = trunc <4 x i64> %3 to <4 x i32>
+ store <4 x i32> %4, ptr %y, align 32
+ ret void
+}
+
+define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
+; CHECK-LABEL: trunc_sat_u32u64_minmax:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vnclipu.wi v10, v8, 0
+; CHECK-NEXT: vse32.v v10, (a1)
+; CHECK-NEXT: ret
+ %1 = load <4 x i64>, ptr %x, align 64
+ %2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %3 = tail call <4 x i64> @llvm.umax.v4i64(<4 x i64> %2, <4 x i64> <i64 0, i64 0, i64 0, i64 0>)
+ %4 = trunc <4 x i64> %3 to <4 x i32>
+ store <4 x i32> %4, ptr %y, align 32
+ ret void
+}
>From 22162ffa565ec1be66bb0f10b36e0d8d62979f8c Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 18 Dec 2023 16:43:45 +0900
Subject: [PATCH 07/10] [RISCV] update fpclamptosat_vec.ll
---
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 76 ++++++-------------
1 file changed, 24 insertions(+), 52 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 3f9e5714eda27b..e1ebf2afda657e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -76,11 +76,9 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vminu.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
@@ -249,12 +247,8 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT: vminu.vx v10, v10, a0
-; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
@@ -671,11 +665,9 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
@@ -951,11 +943,9 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v8, v9, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i32>
@@ -1127,11 +1117,9 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <4 x float> %x to <4 x i32>
@@ -1785,11 +1773,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
@@ -3397,11 +3383,9 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vminu.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
@@ -3565,12 +3549,8 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT: vminu.vx v10, v10, a0
-; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
@@ -3982,11 +3962,9 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: addi a0, a0, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
-; CHECK-V-NEXT: li a0, -1
-; CHECK-V-NEXT: srli a0, a0, 32
-; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 2
; CHECK-V-NEXT: add sp, sp, a0
@@ -4257,11 +4235,9 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vfncvt.rtz.xu.f.w v9, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v8, v9, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v9, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <2 x double> %x to <2 x i32>
@@ -4428,11 +4404,9 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-V-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v8, v8, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v8, 0
; CHECK-V-NEXT: ret
entry:
%conv = fptoui <4 x float> %x to <4 x i32>
@@ -5079,11 +5053,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 7
-; CHECK-V-NEXT: lui a0, 16
-; CHECK-V-NEXT: addi a0, a0, -1
-; CHECK-V-NEXT: vminu.vx v10, v10, a0
; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-V-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-V-NEXT: csrwi vxrm, 0
+; CHECK-V-NEXT: vnclipu.wi v8, v10, 0
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
; CHECK-V-NEXT: add sp, sp, a0
>From 2c213e6093b3a72d9a2144614d8ff7fd7fc900b4 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Thu, 21 Dec 2023 14:36:26 +0900
Subject: [PATCH 08/10] [RISCV] fix indent for VPatTruncSatClipMaxMin
---
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index e92a1796eab47f..298e52368307af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2376,9 +2376,9 @@ multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_umax_vl,
- umaxval, riscv_umin_vl, uminval>;
+ umaxval, riscv_umin_vl, uminval>;
defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
- sminval, riscv_smax_vl, smaxval>;
+ sminval, riscv_smax_vl, smaxval>;
defm : VPatTruncSatClipUMin<vti, wti, uminval>;
}
>From 839a0efdbb4eed47f585a5fb31832b1f1ca2425b Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Thu, 21 Dec 2023 14:47:14 +0900
Subject: [PATCH 09/10] [RISCV] remove VPatTruncSatClipMaxMin for unsigned
saturating truncation
---
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 4 ----
1 file changed, 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 298e52368307af..e8d9a8999f71fd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2371,15 +2371,11 @@ multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti,
multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
defvar sew = vti.SEW;
defvar uminval = !sub(!shl(1, sew), 1);
- defvar umaxval = 0;
defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
- defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_umax_vl,
- umaxval, riscv_umin_vl, uminval>;
defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
sminval, riscv_smax_vl, smaxval>;
-
defm : VPatTruncSatClipUMin<vti, wti, uminval>;
}
>From f4a3ee04f06e45f21fb704c3e2495c54f90dc0d0 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Thu, 28 Dec 2023 22:29:50 +0900
Subject: [PATCH 10/10] [RISCV] use class for VPatTruncSatClipMaxMinBase and
VPatTruncSatClipUMin
---
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 78 ++++++++++---------
1 file changed, 41 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index e8d9a8999f71fd..9b4f3dd7ec10e1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2329,43 +2329,43 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
-multiclass VPatTruncSatClipMaxMinBase<string inst, VTypeInfo vti, VTypeInfo wti,
- SDPatternOperator op1, int op1_value, SDPatternOperator op2, int op2_value> {
- let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
- GetVTypePredicates<wti>.Predicates) in
- def : Pat<(vti.Vector (riscv_trunc_vector_vl
- (wti.Vector (op1
- (wti.Vector (op2
- (wti.Vector wti.RegClass:$rs1),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
- (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
- (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
- (vti.Mask V0), VLOpFrag)),
- (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
- (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
- (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
-}
-
-multiclass VPatTruncSatClipUMin<VTypeInfo vti, VTypeInfo wti, int uminval> {
- let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
- GetVTypePredicates<wti>.Predicates) in
- def : Pat<(vti.Vector (riscv_trunc_vector_vl
- (wti.Vector (riscv_umin_vl
- (wti.Vector wti.RegClass:$rs1),
- (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
- (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
- (vti.Mask V0), VLOpFrag)),
- (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
- (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
- (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
-}
-
+class VPatTruncSatClipMaxMinBase<string inst,
+ VTypeInfo vti,
+ VTypeInfo wti,
+ SDPatternOperator op1,
+ int op1_value,
+ SDPatternOperator op2,
+ int op2_value> :
+ Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (op1
+ (wti.Vector (op2
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))),
+ (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+class VPatTruncSatClipUMin<VTypeInfo vti,
+ VTypeInfo wti,
+ int uminval> :
+ Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (riscv_umin_vl
+ (wti.Vector wti.RegClass:$rs1),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))),
+ (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+ (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>;
multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti,
SDPatternOperator max, int maxval, SDPatternOperator min, int minval> {
- defm : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>;
- defm : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>;
+ def : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>;
+ def : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>;
}
multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
@@ -2374,9 +2374,13 @@ multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> {
defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
- defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
- sminval, riscv_smax_vl, smaxval>;
- defm : VPatTruncSatClipUMin<vti, wti, uminval>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
+ sminval, riscv_smax_vl, smaxval>;
+ def : VPatTruncSatClipUMin<vti, wti, uminval>;
+ }
+
}
foreach vtiToWti = AllWidenableIntVectors in
More information about the llvm-commits
mailing list