[llvm] [RISCV] Combine trunc (sra sext (x), zext (y)) to sra (x, smin (y, scalarsizeinbits(y) - 1)) (PR #65728)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 15 10:45:38 PDT 2023
https://github.com/LWenH updated https://github.com/llvm/llvm-project/pull/65728
>From 2f3848423f2fb8561fe5a62e523971c239adafdb Mon Sep 17 00:00:00 2001
From: LWenH <924105575 at qq.com>
Date: Tue, 5 Sep 2023 19:58:13 +0800
Subject: [PATCH 1/3] [RISCV] Add pre-commit test for later
trunc(sra(sext,zext)) combine, NFC.
Add a series of pre-commit tests for later patch to perform
trunc (sra sext(X), zext(Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
combine.
---
llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll | 173 +++++++++++++++++++++
1 file changed, 173 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
index 743031616967754..2a40b59736c297b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
@@ -12,6 +12,25 @@ define <vscale x 1 x i8> @vsra_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8
ret <vscale x 1 x i8> %vc
}
+define <vscale x 1 x i8> @vsra_vv_nxv1i8_sext_zext(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vsra_vv_nxv1i8_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsext.vf4 v9, v8
+; CHECK-NEXT: vzext.vf4 v10, v8
+; CHECK-NEXT: vsra.vv v8, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 1 x i8> %va to <vscale x 1 x i32>
+ %zexted_vb = zext <vscale x 1 x i8> %va to <vscale x 1 x i32>
+ %expand = ashr <vscale x 1 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 1 x i32> %expand to <vscale x 1 x i8>
+ ret <vscale x 1 x i8> %vc
+}
+
define <vscale x 1 x i8> @vsra_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
; CHECK-LABEL: vsra_vx_nxv1i8:
; CHECK: # %bb.0:
@@ -46,6 +65,25 @@ define <vscale x 2 x i8> @vsra_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8
ret <vscale x 2 x i8> %vc
}
+define <vscale x 2 x i8> @vsra_vv_nxv2i8_sext_zext(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vsra_vv_nxv2i8_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsext.vf4 v9, v8
+; CHECK-NEXT: vzext.vf4 v10, v8
+; CHECK-NEXT: vsra.vv v8, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 2 x i8> %va to <vscale x 2 x i32>
+ %zexted_vb = zext <vscale x 2 x i8> %va to <vscale x 2 x i32>
+ %expand = ashr <vscale x 2 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 2 x i32> %expand to <vscale x 2 x i8>
+ ret <vscale x 2 x i8> %vc
+}
+
define <vscale x 2 x i8> @vsra_vx_nxv2i8(<vscale x 2 x i8> %va, i8 signext %b) {
; CHECK-LABEL: vsra_vx_nxv2i8:
; CHECK: # %bb.0:
@@ -80,6 +118,25 @@ define <vscale x 4 x i8> @vsra_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8
ret <vscale x 4 x i8> %vc
}
+define <vscale x 4 x i8> @vsra_vv_nxv4i8_sext_zext(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vsra_vv_nxv4i8_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsext.vf4 v10, v8
+; CHECK-NEXT: vzext.vf4 v12, v8
+; CHECK-NEXT: vsra.vv v8, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 4 x i8> %va to <vscale x 4 x i32>
+ %zexted_vb = zext <vscale x 4 x i8> %va to <vscale x 4 x i32>
+ %expand = ashr <vscale x 4 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 4 x i32> %expand to <vscale x 4 x i8>
+ ret <vscale x 4 x i8> %vc
+}
+
define <vscale x 4 x i8> @vsra_vx_nxv4i8(<vscale x 4 x i8> %va, i8 signext %b) {
; CHECK-LABEL: vsra_vx_nxv4i8:
; CHECK: # %bb.0:
@@ -114,6 +171,25 @@ define <vscale x 8 x i8> @vsra_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8
ret <vscale x 8 x i8> %vc
}
+define <vscale x 8 x i8> @vsra_vv_nxv8i8_sext_zext(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vsra_vv_nxv8i8_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsext.vf4 v12, v8
+; CHECK-NEXT: vzext.vf4 v16, v8
+; CHECK-NEXT: vsra.vv v8, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 8 x i8> %va to <vscale x 8 x i32>
+ %zexted_vb = zext <vscale x 8 x i8> %va to <vscale x 8 x i32>
+ %expand = ashr <vscale x 8 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 8 x i32> %expand to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %vc
+}
+
define <vscale x 8 x i8> @vsra_vx_nxv8i8(<vscale x 8 x i8> %va, i8 signext %b) {
; CHECK-LABEL: vsra_vx_nxv8i8:
; CHECK: # %bb.0:
@@ -148,6 +224,25 @@ define <vscale x 16 x i8> @vsra_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
ret <vscale x 16 x i8> %vc
}
+define <vscale x 16 x i8> @vsra_vv_nxv16i8_sext_zext(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) {
+; CHECK-LABEL: vsra_vv_nxv16i8_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsext.vf4 v16, v8
+; CHECK-NEXT: vzext.vf4 v24, v8
+; CHECK-NEXT: vsra.vv v8, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v16, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 16 x i8> %va to <vscale x 16 x i32>
+ %zexted_vb = zext <vscale x 16 x i8> %va to <vscale x 16 x i32>
+ %expand = ashr <vscale x 16 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 16 x i32> %expand to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %vc
+}
+
define <vscale x 16 x i8> @vsra_vx_nxv16i8(<vscale x 16 x i8> %va, i8 signext %b) {
; CHECK-LABEL: vsra_vx_nxv16i8:
; CHECK: # %bb.0:
@@ -250,6 +345,21 @@ define <vscale x 1 x i16> @vsra_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
ret <vscale x 1 x i16> %vc
}
+define <vscale x 1 x i16> @vsra_vv_nxv1i16_sext_zext(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vsra_vv_nxv1i16_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsra.wv v8, v9, v8
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 1 x i16> %va to <vscale x 1 x i32>
+ %zexted_vb = zext <vscale x 1 x i16> %va to <vscale x 1 x i32>
+ %expand = ashr <vscale x 1 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 1 x i32> %expand to <vscale x 1 x i16>
+ ret <vscale x 1 x i16> %vc
+}
+
define <vscale x 1 x i16> @vsra_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %b) {
; CHECK-LABEL: vsra_vx_nxv1i16:
; CHECK: # %bb.0:
@@ -284,6 +394,21 @@ define <vscale x 2 x i16> @vsra_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
ret <vscale x 2 x i16> %vc
}
+define <vscale x 2 x i16> @vsra_vv_nxv2i16_sext_zext(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vsra_vv_nxv2i16_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsext.vf2 v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsra.wv v8, v9, v8
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 2 x i16> %va to <vscale x 2 x i32>
+ %zexted_vb = zext <vscale x 2 x i16> %va to <vscale x 2 x i32>
+ %expand = ashr <vscale x 2 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 2 x i32> %expand to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %vc
+}
+
define <vscale x 2 x i16> @vsra_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %b) {
; CHECK-LABEL: vsra_vx_nxv2i16:
; CHECK: # %bb.0:
@@ -318,6 +443,22 @@ define <vscale x 4 x i16> @vsra_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
ret <vscale x 4 x i16> %vc
}
+define <vscale x 4 x i16> @vsra_vv_nxv4i16_sext_zext(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vsra_vv_nxv4i16_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsext.vf2 v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsra.wv v9, v10, v8
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 4 x i16> %va to <vscale x 4 x i32>
+ %zexted_vb = zext <vscale x 4 x i16> %va to <vscale x 4 x i32>
+ %expand = ashr <vscale x 4 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 4 x i32> %expand to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %vc
+}
+
define <vscale x 4 x i16> @vsra_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %b) {
; CHECK-LABEL: vsra_vx_nxv4i16:
; CHECK: # %bb.0:
@@ -352,6 +493,22 @@ define <vscale x 8 x i16> @vsra_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
ret <vscale x 8 x i16> %vc
}
+define <vscale x 8 x i16> @vsra_vv_nxv8i16_sext_zext(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vsra_vv_nxv8i16_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsra.wv v10, v12, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 8 x i16> %va to <vscale x 8 x i32>
+ %zexted_vb = zext <vscale x 8 x i16> %va to <vscale x 8 x i32>
+ %expand = ashr <vscale x 8 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 8 x i32> %expand to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %vc
+}
+
define <vscale x 8 x i16> @vsra_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %b) {
; CHECK-LABEL: vsra_vx_nxv8i16:
; CHECK: # %bb.0:
@@ -386,6 +543,22 @@ define <vscale x 16 x i16> @vsra_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x
ret <vscale x 16 x i16> %vc
}
+define <vscale x 16 x i16> @vsra_vv_nxv16i16_sext_zext(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) {
+; CHECK-LABEL: vsra_vv_nxv16i16_sext_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vsext.vf2 v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsra.wv v12, v16, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %sexted_va = sext <vscale x 16 x i16> %va to <vscale x 16 x i32>
+ %zexted_vb = zext <vscale x 16 x i16> %va to <vscale x 16 x i32>
+ %expand = ashr <vscale x 16 x i32> %sexted_va, %zexted_vb
+ %vc = trunc <vscale x 16 x i32> %expand to <vscale x 16 x i16>
+ ret <vscale x 16 x i16> %vc
+}
+
define <vscale x 16 x i16> @vsra_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signext %b) {
; CHECK-LABEL: vsra_vx_nxv16i16:
; CHECK: # %bb.0:
>From fcf67a4ca7ad1705a3f3c5c2af376acad381cbe1 Mon Sep 17 00:00:00 2001
From: LWenH <924105575 at qq.com>
Date: Wed, 6 Sep 2023 17:38:18 +0800
Subject: [PATCH 2/3] [RISCV] combine trunc (sra sext (X), zext (Y)) to sra (X,
smin (Y, scalarsize(Y) - 1))
For i8/i16 element-wise vector arithmetic right shift, the src value
would be first sign_extended to i32 and the shift amount would be
zero_extended to i32 to perform the vsra instruction, and followed
by a trunc to get the final calcualtion result. For RVV, the truncate
would be lowered into n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW
truncate restriction, such pattern would be expanded into a series of "vsetvli"
and "vnsrl" instructions later. For RVV, we can use smin(Y, ScalarSizeInBits(Y)-1)
to determine the actual shift amount for the vsra instruction, because we only
care about the low lg2(SEW) bits as the shift amount.
For more transformation validation, please see alive2 links:
https://alive2.llvm.org/ce/z/wXLrLT
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 58 +++++++++++
llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll | 103 ++++++++------------
2 files changed, 98 insertions(+), 63 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d7112461a18158f..b640eec27d39e5c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13639,6 +13639,64 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
}
return SDValue();
+ case RISCVISD::TRUNCATE_VECTOR_VL: {
+ // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
+ // This would be benefit for the cases where X and Y are both the same value
+ // type of low precision vectors. Since the truncate would be lowered into
+ // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
+ // restriction, such pattern would be expanded into a series of "vsetvli"
+ // and "vnsrl" instructions later to reach this point.
+
+ auto IsTruncNode = [](SDValue V) {
+ if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
+ return false;
+
+ SDValue VL = V.getOperand(2);
+ auto *C = dyn_cast<ConstantSDNode>(VL);
+
+ // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
+ bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
+ (isa<RegisterSDNode>(VL) &&
+ cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
+
+ return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
+ IsVLMAXForVMSET;
+ };
+
+ SDValue Op = N->getOperand(0);
+
+ // We need to first find the inner level of TRUNCATE_VECTOR_VL node
+ // to distinguish such pattern.
+ while (IsTruncNode(Op)) {
+ if (!Op.hasOneUse())
+ return SDValue();
+ Op = Op.getOperand(0);
+ }
+
+ if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+
+ if (N00.getValueType().isVector() &&
+ N00.getValueType() == N10.getValueType() && N->hasOneUse() &&
+ N->getValueType(0) == N10.getValueType()) {
+ unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
+
+ SDValue SMin = DAG.getNode(
+ ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
+ DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
+
+ return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
+ }
+ }
+ }
+ break;
+ }
case ISD::TRUNCATE:
return performTRUNCATECombine(N, DAG, Subtarget);
case ISD::SELECT:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
index 2a40b59736c297b..738e9cf805b46f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
@@ -15,14 +15,10 @@ define <vscale x 1 x i8> @vsra_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8
define <vscale x 1 x i8> @vsra_vv_nxv1i8_sext_zext(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
; CHECK-LABEL: vsra_vv_nxv1i8_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vsext.vf4 v9, v8
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vsra.vv v8, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmin.vx v9, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v9
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 1 x i8> %va to <vscale x 1 x i32>
%zexted_vb = zext <vscale x 1 x i8> %va to <vscale x 1 x i32>
@@ -68,14 +64,10 @@ define <vscale x 2 x i8> @vsra_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8
define <vscale x 2 x i8> @vsra_vv_nxv2i8_sext_zext(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
; CHECK-LABEL: vsra_vv_nxv2i8_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vsext.vf4 v9, v8
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vsra.vv v8, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmin.vx v9, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v9
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 2 x i8> %va to <vscale x 2 x i32>
%zexted_vb = zext <vscale x 2 x i8> %va to <vscale x 2 x i32>
@@ -121,14 +113,10 @@ define <vscale x 4 x i8> @vsra_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8
define <vscale x 4 x i8> @vsra_vv_nxv4i8_sext_zext(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
; CHECK-LABEL: vsra_vv_nxv4i8_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v8
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vsra.vv v8, v10, v12
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v10, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmin.vx v9, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v9
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 4 x i8> %va to <vscale x 4 x i32>
%zexted_vb = zext <vscale x 4 x i8> %va to <vscale x 4 x i32>
@@ -174,14 +162,10 @@ define <vscale x 8 x i8> @vsra_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8
define <vscale x 8 x i8> @vsra_vv_nxv8i8_sext_zext(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
; CHECK-LABEL: vsra_vv_nxv8i8_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v12, v8
-; CHECK-NEXT: vzext.vf4 v16, v8
-; CHECK-NEXT: vsra.vv v8, v12, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmin.vx v9, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v9
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 8 x i8> %va to <vscale x 8 x i32>
%zexted_vb = zext <vscale x 8 x i8> %va to <vscale x 8 x i32>
@@ -227,14 +211,10 @@ define <vscale x 16 x i8> @vsra_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
define <vscale x 16 x i8> @vsra_vv_nxv16i8_sext_zext(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) {
; CHECK-LABEL: vsra_vv_nxv16i8_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v8
-; CHECK-NEXT: vzext.vf4 v24, v8
-; CHECK-NEXT: vsra.vv v8, v16, v24
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmin.vx v10, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v10
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 16 x i8> %va to <vscale x 16 x i32>
%zexted_vb = zext <vscale x 16 x i8> %va to <vscale x 16 x i32>
@@ -348,10 +328,10 @@ define <vscale x 1 x i16> @vsra_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
define <vscale x 1 x i16> @vsra_vv_nxv1i16_sext_zext(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
; CHECK-LABEL: vsra_vv_nxv1i16_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vsext.vf2 v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vnsra.wv v8, v9, v8
+; CHECK-NEXT: li a0, 15
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmin.vx v9, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v9
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 1 x i16> %va to <vscale x 1 x i32>
%zexted_vb = zext <vscale x 1 x i16> %va to <vscale x 1 x i32>
@@ -397,10 +377,10 @@ define <vscale x 2 x i16> @vsra_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
define <vscale x 2 x i16> @vsra_vv_nxv2i16_sext_zext(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
; CHECK-LABEL: vsra_vv_nxv2i16_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vsext.vf2 v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vnsra.wv v8, v9, v8
+; CHECK-NEXT: li a0, 15
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmin.vx v9, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v9
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 2 x i16> %va to <vscale x 2 x i32>
%zexted_vb = zext <vscale x 2 x i16> %va to <vscale x 2 x i32>
@@ -446,11 +426,10 @@ define <vscale x 4 x i16> @vsra_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
define <vscale x 4 x i16> @vsra_vv_nxv4i16_sext_zext(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
; CHECK-LABEL: vsra_vv_nxv4i16_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT: vsext.vf2 v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vnsra.wv v9, v10, v8
-; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: li a0, 15
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmin.vx v9, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v9
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 4 x i16> %va to <vscale x 4 x i32>
%zexted_vb = zext <vscale x 4 x i16> %va to <vscale x 4 x i32>
@@ -496,11 +475,10 @@ define <vscale x 8 x i16> @vsra_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
define <vscale x 8 x i16> @vsra_vv_nxv8i16_sext_zext(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
; CHECK-LABEL: vsra_vv_nxv8i16_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vsext.vf2 v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vnsra.wv v10, v12, v8
-; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: li a0, 15
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmin.vx v10, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v10
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 8 x i16> %va to <vscale x 8 x i32>
%zexted_vb = zext <vscale x 8 x i16> %va to <vscale x 8 x i32>
@@ -546,11 +524,10 @@ define <vscale x 16 x i16> @vsra_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x
define <vscale x 16 x i16> @vsra_vv_nxv16i16_sext_zext(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) {
; CHECK-LABEL: vsra_vv_nxv16i16_sext_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vsext.vf2 v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vnsra.wv v12, v16, v8
-; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: li a0, 15
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmin.vx v12, v8, a0
+; CHECK-NEXT: vsra.vv v8, v8, v12
; CHECK-NEXT: ret
%sexted_va = sext <vscale x 16 x i16> %va to <vscale x 16 x i32>
%zexted_vb = zext <vscale x 16 x i16> %va to <vscale x 16 x i32>
>From 53a4018ea15e399916f8a5f4c88f2c6b8af051ef Mon Sep 17 00:00:00 2001
From: LWenH <924105575 at qq.com>
Date: Sat, 16 Sep 2023 01:33:52 +0800
Subject: [PATCH 3/3] Address comment and reformat the code
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b640eec27d39e5c..59b6ec67137cb48 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13646,19 +13646,15 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
// restriction, such pattern would be expanded into a series of "vsetvli"
// and "vnsrl" instructions later to reach this point.
-
auto IsTruncNode = [](SDValue V) {
if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
return false;
-
SDValue VL = V.getOperand(2);
auto *C = dyn_cast<ConstantSDNode>(VL);
-
// Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
(isa<RegisterSDNode>(VL) &&
cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
-
return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
IsVLMAXForVMSET;
};
@@ -13676,21 +13672,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
-
if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
SDValue N00 = N0.getOperand(0);
SDValue N10 = N1.getOperand(0);
-
if (N00.getValueType().isVector() &&
- N00.getValueType() == N10.getValueType() && N->hasOneUse() &&
+ N00.getValueType() == N10.getValueType() &&
N->getValueType(0) == N10.getValueType()) {
unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
-
SDValue SMin = DAG.getNode(
ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
-
return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
}
}
More information about the llvm-commits
mailing list