[llvm] fd465f3 - [RISCV] Move vmv_s_x and vfmv_s_f special casing to DAG combine
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 30 12:22:54 PDT 2023
Author: Philip Reames
Date: 2023-08-30T12:04:48-07:00
New Revision: fd465f377c541bbcfb12c1268bf725deaf23d2e0
URL: https://github.com/llvm/llvm-project/commit/fd465f377c541bbcfb12c1268bf725deaf23d2e0
DIFF: https://github.com/llvm/llvm-project/commit/fd465f377c541bbcfb12c1268bf725deaf23d2e0.diff
LOG: [RISCV] Move vmv_s_x and vfmv_s_f special casing to DAG combine
We'd discussed this in the original set of patches months ago, but decided against it. I think we should reverse ourselves here as the code is significantly more readable, and we do pick up cases we'd missed by not calling the appropriate helper routine.
Differential Revision: https://reviews.llvm.org/D158854
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll
llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 92d7d7a0032e46..7d177b26efb4e0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3698,20 +3698,10 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
DAG.getConstant(0, DL, XLenVT));
}
- if (VT.isFloatingPoint()) {
- // TODO: Use vmv.v.i for appropriate constants
- // Use M1 or smaller to avoid over constraining register allocation
- const MVT M1VT = getLMUL1VT(VT);
- auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
- SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
- DAG.getUNDEF(InnerVT), Scalar, VL);
- if (VT != InnerVT)
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- DAG.getUNDEF(VT),
- Result, DAG.getConstant(0, DL, XLenVT));
- return Result;
- }
+ if (VT.isFloatingPoint())
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
+ DAG.getUNDEF(VT), Scalar, VL);
// Avoid the tricky legalization cases by falling back to using the
// splat code which already handles it gracefully.
@@ -3727,24 +3717,8 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
- // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
- // higher would involve overly constraining the register allocator for
- // no purpose.
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
- if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
- VT.bitsLE(getLMUL1VT(VT)))
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
- }
- // Use M1 or smaller to avoid over constraining register allocation
- const MVT M1VT = getLMUL1VT(VT);
- auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
- SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
- DAG.getUNDEF(InnerVT), Scalar, VL);
- if (VT != InnerVT)
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- DAG.getUNDEF(VT),
- Result, DAG.getConstant(0, DL, XLenVT));
- return Result;
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
+ DAG.getUNDEF(VT), Scalar, VL);
}
// Is this a shuffle extracts either the even or odd elements of a vector?
@@ -13386,6 +13360,8 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ const MVT XLenVT = Subtarget.getXLenVT();
+ SDLoc DL(N);
// Helper to call SimplifyDemandedBits on an operand of N where only some low
// bits are demanded. N will be added to the Worklist if it was not deleted.
@@ -13417,8 +13393,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DCI.CombineTo(N, Lo, Hi);
}
- SDLoc DL(N);
-
// It's cheaper to materialise two 32-bit integers than to load a double
// from the constant pool and transfer it to integer registers through the
// stack.
@@ -13752,7 +13726,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
EVT IndexVT = Index.getValueType();
- MVT XLenVT = Subtarget.getXLenVT();
// RISC-V indexed loads only support the "unsigned unscaled" addressing
// mode, so anything else must be manually legalized.
bool NeedsIdxLegalization =
@@ -14002,6 +13975,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Src.getOperand(0);
// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
}
+ [[fallthrough]];
+ }
+ case RISCVISD::VMV_S_X_VL: {
+ const MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Scalar = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // Use M1 or smaller to avoid over constraining register allocation
+ const MVT M1VT = getLMUL1VT(VT);
+ if (M1VT.bitsLT(VT) && Passthru.isUndef()) {
+ SDValue Result =
+ DAG.getNode(N->getOpcode(), DL, M1VT, Passthru, Scalar, VL);
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ Result, DAG.getConstant(0, DL, XLenVT));
+ return Result;
+ }
+
+ // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
+ // higher would involve overly constraining the register allocator for
+ // no purpose.
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
+ Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
+ VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
+
break;
}
case ISD::INTRINSIC_VOID:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index ea02b0280ead8e..097027ff529374 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -419,9 +419,8 @@ define void @insertelt_v8i64_0_store(ptr %x) {
define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
; RV32-LABEL: insertelt_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: li a1, -1
; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
-; RV32-NEXT: vmv.s.x v12, a1
+; RV32-NEXT: vmv.v.i v12, -1
; RV32-NEXT: addi a1, a0, 1
; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
; RV32-NEXT: vslideup.vx v8, v12, a0
@@ -429,9 +428,8 @@ define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) {
;
; RV64-LABEL: insertelt_v8i64:
; RV64: # %bb.0:
-; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v12, a1
+; RV64-NEXT: vmv.v.i v12, -1
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: addi a1, a0, 1
@@ -499,9 +497,8 @@ define void @insertelt_c6_v8i64_0_store(ptr %x) {
define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
; RV32-LABEL: insertelt_c6_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: li a1, 6
; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma
-; RV32-NEXT: vmv.s.x v12, a1
+; RV32-NEXT: vmv.v.i v12, 6
; RV32-NEXT: addi a1, a0, 1
; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma
; RV32-NEXT: vslideup.vx v8, v12, a0
@@ -509,9 +506,8 @@ define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) {
;
; RV64-LABEL: insertelt_c6_v8i64:
; RV64: # %bb.0:
-; RV64-NEXT: li a1, 6
; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v12, a1
+; RV64-NEXT: vmv.v.i v12, 6
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: addi a1, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 376b63be32f016..b1da392cfadf94 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -696,18 +696,18 @@ define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3,
; CHECK-NEXT: vsrl.vi v8, v8, 1
; CHECK-NEXT: vadd.vi v8, v8, 3
; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: vse16.v v8, (a2)
; CHECK-NEXT: vse16.v v8, (a3)
; CHECK-NEXT: vse16.v v8, (a4)
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: vmv.v.i v9, 4
+; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v9, v8, 1
+; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vse16.v v9, (a5)
+; CHECK-NEXT: vse16.v v8, (a5)
; CHECK-NEXT: li a0, 4
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v9, a0
; CHECK-NEXT: vse16.v v8, (a6)
; CHECK-NEXT: ret
store <4 x i16> <i16 3, i16 3, i16 4, i16 4>, ptr %z0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 08a550f3eb4480..4cce6be15d5d89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -945,15 +945,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: li a1, 1
; RV64-NEXT: vmv.v.i v12, 7
-; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a3, a2, 4
-; RV64-NEXT: add a2, a3, a2
-; RV64-NEXT: add a2, sp, a2
-; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: vmv.s.x v16, a1
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.i v16, 1
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: add a1, sp, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
index 79de1574b9e7c4..39f94eab2aa660 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
@@ -781,9 +781,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_0_c10(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_imm_c10(<vscale x 2 x i64> %v) {
; CHECK-LABEL: insertelt_nxv2i64_imm_c10:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 10
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 10
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 3
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i64> %v, i64 10, i32 3
@@ -793,9 +793,8 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_imm_c10(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_idx_c10(<vscale x 2 x i64> %v, i32 %idx) {
; CHECK-LABEL: insertelt_nxv2i64_idx_c10:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 10
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
@@ -818,9 +817,9 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_0_cn1(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_imm_cn1(<vscale x 2 x i64> %v) {
; CHECK-LABEL: insertelt_nxv2i64_imm_cn1:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 3
; CHECK-NEXT: ret
%r = insertelement <vscale x 2 x i64> %v, i64 -1, i32 3
@@ -830,9 +829,8 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_imm_cn1(<vscale x 2 x i64> %v) {
define <vscale x 2 x i64> @insertelt_nxv2i64_idx_cn1(<vscale x 2 x i64> %v, i32 %idx) {
; CHECK-LABEL: insertelt_nxv2i64_idx_cn1:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, -1
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, -1
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll
index 0b7b722cf81dc8..5d5a2a3b898bc6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll
@@ -8,9 +8,9 @@ define i32 @splat_vector_split_i64() {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: li a0, 3
-; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 3
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 3
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 4ef28252b2a9d8..9796ffa184ca86 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -668,10 +668,10 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: vmv.v.i v10, 1
; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0
; RV32MV-NEXT: vand.vv v8, v8, v10
-; RV32MV-NEXT: li a0, 2
-; RV32MV-NEXT: vmv.s.x v10, a0
-; RV32MV-NEXT: li a0, 1
-; RV32MV-NEXT: vmv.s.x v12, a0
+; RV32MV-NEXT: vsetivli zero, 8, e32, m1, ta, ma
+; RV32MV-NEXT: vmv.v.i v10, 2
+; RV32MV-NEXT: vmv.v.i v12, 1
+; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32MV-NEXT: vmv.v.i v14, 0
; RV32MV-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV32MV-NEXT: vslideup.vi v14, v12, 2
More information about the llvm-commits
mailing list