[llvm] f0a9aac - [RISCV] Use vmv.s.x for a constant build_vector when the entire size is less than 32 bits
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 7 17:15:13 PDT 2023
Author: Philip Reames
Date: 2023-08-07T17:15:05-07:00
New Revision: f0a9aacdb9a2192c34963fe13b947a656c41bdda
URL: https://github.com/llvm/llvm-project/commit/f0a9aacdb9a2192c34963fe13b947a656c41bdda
DIFF: https://github.com/llvm/llvm-project/commit/f0a9aacdb9a2192c34963fe13b947a656c41bdda.diff
LOG: [RISCV] Use vmv.s.x for a constant build_vector when the entire size is less than 32 bits
We have a variant of this for splats already, but hadn't handled the case where a single copy of the wider element can be inserted producing the entire required bit pattern. This shows up mostly in very small vector shuffle tests.
Differential Revision: https://reviews.llvm.org/D157299
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5e54be8e4dbabd..25a1dccd7b75a4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3275,6 +3275,48 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
}
}
+ // For very small build_vectors, use a single scalar insert of a constant.
+ // TODO: Base this on constant rematerialization cost, not size.
+ const unsigned EltBitSize = VT.getScalarSizeInBits();
+ if (VT.getSizeInBits() <= 32 &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
+ "Unexpected sequence type");
+ // If we can use the original VL with the modified element type, this
+ // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
+ // be moved into InsertVSETVLI?
+ unsigned ViaVecLen =
+ (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
+ MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
+
+ uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
+ uint64_t SplatValue = 0;
+ // Construct the amalgamated value at this larger vector type.
+ for (const auto &OpIdx : enumerate(Op->op_values())) {
+ const auto &SeqV = OpIdx.value();
+ if (!SeqV.isUndef())
+ SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
+ << (OpIdx.index() * EltBitSize));
+ }
+
+ // On RV64, sign-extend from 32 to 64 bits where possible in order to
+ // achieve better constant materializion.
+ if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
+ SplatValue = SignExtend64<32>(SplatValue);
+
+ SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
+ DAG.getUNDEF(ViaVecVT),
+ DAG.getConstant(SplatValue, DL, XLenVT),
+ DAG.getConstant(0, DL, XLenVT));
+ if (ViaVecLen != 1)
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ MVT::getVectorVT(ViaIntVT, 1), Vec,
+ DAG.getConstant(0, DL, XLenVT));
+ return DAG.getBitcast(VT, Vec);
+ }
+
+
// Attempt to detect "hidden" splats, which only reveal themselves as splats
// when re-interpreted as a vector with a larger element type. For example,
// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
@@ -3283,7 +3325,6 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// TODO: This optimization could also work on non-constant splats, but it
// would require bit-manipulation instructions to construct the splat value.
SmallVector<SDValue> Sequence;
- unsigned EltBitSize = VT.getScalarSizeInBits();
const auto *BV = cast<BuildVectorSDNode>(Op);
if (VT.isInteger() && EltBitSize < 64 &&
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index dd6042becad4f5..4d63083f28ab51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -233,27 +233,49 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
%x6v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
define %x6v4i8 @buildvec_no_vid_v4i8() {
-; CHECK-LABEL: buildvec_no_vid_v4i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_1)
-; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: slli a0, a0, 11
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: li a0, 2047
-; CHECK-NEXT: vmv.v.x v11, a0
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: lui a0, %hi(.LCPI14_2)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_2)
-; CHECK-NEXT: vle8.v v13, (a0)
-; CHECK-NEXT: vmv.v.i v12, -2
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_no_vid_v4i8:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a0, 28768
+; RV32-NEXT: addi a0, a0, 769
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: lui a0, 28752
+; RV32-NEXT: addi a0, a0, 512
+; RV32-NEXT: vmv.s.x v9, a0
+; RV32-NEXT: lui a0, 32768
+; RV32-NEXT: vmv.s.x v10, a0
+; RV32-NEXT: lui a0, 28672
+; RV32-NEXT: addi a0, a0, 255
+; RV32-NEXT: vmv.s.x v11, a0
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; RV32-NEXT: vmv.v.i v12, -2
+; RV32-NEXT: lui a0, 1032144
+; RV32-NEXT: addi a0, a0, -257
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vmv.s.x v13, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: buildvec_no_vid_v4i8:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a0, 28768
+; RV64-NEXT: addiw a0, a0, 769
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: lui a0, 28752
+; RV64-NEXT: addiw a0, a0, 512
+; RV64-NEXT: vmv.s.x v9, a0
+; RV64-NEXT: lui a0, 32768
+; RV64-NEXT: vmv.s.x v10, a0
+; RV64-NEXT: lui a0, 28672
+; RV64-NEXT: addiw a0, a0, 255
+; RV64-NEXT: vmv.s.x v11, a0
+; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; RV64-NEXT: vmv.v.i v12, -2
+; RV64-NEXT: lui a0, 1032144
+; RV64-NEXT: addiw a0, a0, -257
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64-NEXT: vmv.s.x v13, a0
+; RV64-NEXT: ret
%1 = insertvalue %x6v4i8 poison, <4 x i8> <i8 1, i8 3, i8 6, i8 7>, 0
%2 = insertvalue %x6v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, 1
%3 = insertvalue %x6v4i8 %2, <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, 2
@@ -662,22 +684,29 @@ define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) {
define <4 x i8> @buildvec_not_vid_v4i8_1() {
; CHECK-LABEL: buildvec_not_vid_v4i8_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI37_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: lui a0, 12320
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 0, i8 2, i8 3>
}
define <4 x i8> @buildvec_not_vid_v4i8_2() {
-; CHECK-LABEL: buildvec_not_vid_v4i8_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI38_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_not_vid_v4i8_2:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a0, 16
+; RV32-NEXT: addi a0, a0, 771
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: buildvec_not_vid_v4i8_2:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a0, 16
+; RV64-NEXT: addiw a0, a0, 771
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv.s.x v8, a0
+; RV64-NEXT: ret
ret <4 x i8> <i8 3, i8 3, i8 1, i8 0>
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index defd3409c3e66a..78b5f01ebbb53e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -551,25 +551,49 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
; This shouldn't be interleaved
define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) {
-; V128-LABEL: unary_interleave_v4i8_invalid:
-; V128: # %bb.0:
-; V128-NEXT: lui a0, %hi(.LCPI19_0)
-; V128-NEXT: addi a0, a0, %lo(.LCPI19_0)
-; V128-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; V128-NEXT: vle8.v v10, (a0)
-; V128-NEXT: vrgather.vv v9, v8, v10
-; V128-NEXT: vmv1r.v v8, v9
-; V128-NEXT: ret
+; RV32-V128-LABEL: unary_interleave_v4i8_invalid:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: lui a0, 16
+; RV32-V128-NEXT: addi a0, a0, 768
+; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-V128-NEXT: vmv.s.x v10, a0
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; RV32-V128-NEXT: vrgather.vv v9, v8, v10
+; RV32-V128-NEXT: vmv1r.v v8, v9
+; RV32-V128-NEXT: ret
;
-; V512-LABEL: unary_interleave_v4i8_invalid:
-; V512: # %bb.0:
-; V512-NEXT: lui a0, %hi(.LCPI19_0)
-; V512-NEXT: addi a0, a0, %lo(.LCPI19_0)
-; V512-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
-; V512-NEXT: vle8.v v10, (a0)
-; V512-NEXT: vrgather.vv v9, v8, v10
-; V512-NEXT: vmv1r.v v8, v9
-; V512-NEXT: ret
+; RV64-V128-LABEL: unary_interleave_v4i8_invalid:
+; RV64-V128: # %bb.0:
+; RV64-V128-NEXT: lui a0, 16
+; RV64-V128-NEXT: addiw a0, a0, 768
+; RV64-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-V128-NEXT: vmv.s.x v10, a0
+; RV64-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; RV64-V128-NEXT: vrgather.vv v9, v8, v10
+; RV64-V128-NEXT: vmv1r.v v8, v9
+; RV64-V128-NEXT: ret
+;
+; RV32-V512-LABEL: unary_interleave_v4i8_invalid:
+; RV32-V512: # %bb.0:
+; RV32-V512-NEXT: lui a0, 16
+; RV32-V512-NEXT: addi a0, a0, 768
+; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; RV32-V512-NEXT: vmv.s.x v10, a0
+; RV32-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV32-V512-NEXT: vrgather.vv v9, v8, v10
+; RV32-V512-NEXT: vmv1r.v v8, v9
+; RV32-V512-NEXT: ret
+;
+; RV64-V512-LABEL: unary_interleave_v4i8_invalid:
+; RV64-V512: # %bb.0:
+; RV64-V512-NEXT: lui a0, 16
+; RV64-V512-NEXT: addiw a0, a0, 768
+; RV64-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; RV64-V512-NEXT: vmv.s.x v10, a0
+; RV64-V512-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64-V512-NEXT: vrgather.vv v9, v8, v10
+; RV64-V512-NEXT: vmv1r.v v8, v9
+; RV64-V512-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 4>
ret <4 x i8> %a
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index 7572514cd21231..0f9ba128a0c07c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -373,10 +373,10 @@ define <4 x i8> @vslide1up_4xi8_neg_undef_insert(<4 x i8> %v, i8 %b) {
define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert(<4 x i8> %v, i8 %b) {
; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI23_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: lui a0, 8208
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
@@ -397,15 +397,27 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert2(<4 x i8> %v, i8 %b) {
}
define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) {
-; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI25_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v10, (a0)
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a0, 8208
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.s.x v10, a0
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vslide1up_4xi8_neg_incorrect_insert3:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a0, 8208
+; RV64-NEXT: addiw a0, a0, 1
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv.s.x v10, a0
+; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: ret
%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 5, i32 4, i32 5, i32 6>
ret <4 x i8> %v2
}
More information about the llvm-commits
mailing list