[llvm] c74ab89 - [RISCV] Ensure small mask BUILD_VECTORs aren't expanded
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Thu May 20 11:20:27 PDT 2021
Author: Fraser Cormack
Date: 2021-05-20T19:12:29+01:00
New Revision: c74ab891fc9ce4436a3360e14becea9c6794837f
URL: https://github.com/llvm/llvm-project/commit/c74ab891fc9ce4436a3360e14becea9c6794837f
DIFF: https://github.com/llvm/llvm-project/commit/c74ab891fc9ce4436a3360e14becea9c6794837f.diff
LOG: [RISCV] Ensure small mask BUILD_VECTORs aren't expanded
The default expansion for BUILD_VECTORs -- save for going through
shuffles -- is to go through the stack. This method only works when the
type is at least byte-sized, so for v2i1 and v4i1 we would crash.
This patch ensures that small mask-type BUILD_VECTORs are always handled
without crashing. We lower to a SETCC of the equivalent i8 type.
This also exposes some pre-existing issues where the lowering when
optimizing for size results in larger code than without. Those will be
tackled in future patches.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D102767
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4b06e7feee78..0c219f763047 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1367,12 +1367,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// according to the size of the final vector - use i8 chunks rather than
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
- // If we have to use more than one INSERT_VECTOR_ELT then this optimization
- // is likely to increase code size; avoid peforming it in such a case.
unsigned NumViaIntegerBits =
std::min(std::max(NumElts, 8u), Subtarget.getXLen());
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
- (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) {
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ // If we have to use more than one INSERT_VECTOR_ELT then this
+ // optimization is likely to increase code size; avoid peforming it in
+ // such a case. We can go through the stack as long as we're at least
+ // byte-sized.
+ if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
+ return SDValue();
// Now we can create our integer vector type. Note that it may be larger
// than the resulting mask type: v4i1 would use v1i8 as its integer type.
MVT IntegerViaVecVT =
@@ -1427,20 +1430,29 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return Vec;
}
- // A splat can be lowered as a SETCC. For each fixed-length mask vector
- // type, we have a legal equivalently-sized i8 type, so we can use that.
+ // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
+ // vector type, we have a legal equivalently-sized i8 type, so we can use
+ // that.
+ MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
+ SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
+
+ SDValue WideVec;
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ // For a splat, perform a scalar truncate before creating the wider
+ // vector.
assert(Splat.getValueType() == XLenVT &&
"Unexpected type for i1 splat value");
- MVT InterVT = VT.changeVectorElementType(MVT::i8);
Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
DAG.getConstant(1, DL, XLenVT));
- Splat = DAG.getSplatBuildVector(InterVT, DL, Splat);
- SDValue Zero = DAG.getConstant(0, DL, InterVT);
- return DAG.getSetCC(DL, VT, Splat, Zero, ISD::SETNE);
+ WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
+ } else {
+ SmallVector<SDValue, 8> Ops(Op->op_values());
+ WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
+ SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
+ WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
}
- return SDValue();
+ return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index b25c05f5f599..11cbe610f654 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -8,6 +8,64 @@
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8
+define <1 x i1> @buildvec_mask_nonconst_v1i1(i1 %x) {
+; CHECK-LABEL: buildvec_mask_nonconst_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: ret
+ %1 = insertelement <1 x i1> undef, i1 %x, i32 0
+ ret <1 x i1> %1
+}
+
+define <1 x i1> @buildvec_mask_optsize_nonconst_v1i1(i1 %x) optsize {
+; CHECK-LABEL: buildvec_mask_optsize_nonconst_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a0
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: ret
+ %1 = insertelement <1 x i1> undef, i1 %x, i32 0
+ ret <1 x i1> %1
+}
+
+define <2 x i1> @buildvec_mask_nonconst_v2i1(i1 %x, i1 %y) {
+; CHECK-LABEL: buildvec_mask_nonconst_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a2, 2, e8,mf8,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmv.s.x v25, a0
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: ret
+ %1 = insertelement <2 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <2 x i1> %1, i1 %y, i32 1
+ ret <2 x i1> %2
+}
+
+; FIXME: optsize isn't smaller than the code above
+define <2 x i1> @buildvec_mask_optsize_nonconst_v2i1(i1 %x, i1 %y) optsize {
+; CHECK-LABEL: buildvec_mask_optsize_nonconst_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sb a1, 15(sp)
+; CHECK-NEXT: sb a0, 14(sp)
+; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu
+; CHECK-NEXT: addi a0, sp, 14
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %1 = insertelement <2 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <2 x i1> %1, i1 %y, i32 1
+ ret <2 x i1> %2
+}
+
define <3 x i1> @buildvec_mask_v1i1() {
; CHECK-LABEL: buildvec_mask_v1i1:
; CHECK: # %bb.0:
@@ -38,6 +96,73 @@ define <4 x i1> @buildvec_mask_v4i1() {
ret <4 x i1> <i1 0, i1 1, i1 1, i1 0>
}
+define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) {
+; CHECK-LABEL: buildvec_mask_nonconst_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 3
+; CHECK-NEXT: vsetivli a3, 1, e8,mf8,ta,mu
+; CHECK-NEXT: vmv.s.x v0, a2
+; CHECK-NEXT: vsetivli a2, 4, e8,mf4,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmerge.vxm v25, v25, a0, v0
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: ret
+ %1 = insertelement <4 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <4 x i1> %1, i1 %x, i32 1
+ %3 = insertelement <4 x i1> %2, i1 %y, i32 2
+ %4 = insertelement <4 x i1> %3, i1 %y, i32 3
+ ret <4 x i1> %4
+}
+
+; FIXME: optsize isn't smaller than the code above
+define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize {
+; CHECK-LABEL: buildvec_mask_optsize_nonconst_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sb a1, 15(sp)
+; CHECK-NEXT: sb a1, 14(sp)
+; CHECK-NEXT: sb a0, 13(sp)
+; CHECK-NEXT: sb a0, 12(sp)
+; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu
+; CHECK-NEXT: addi a0, sp, 12
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %1 = insertelement <4 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <4 x i1> %1, i1 %x, i32 1
+ %3 = insertelement <4 x i1> %2, i1 %y, i32 2
+ %4 = insertelement <4 x i1> %3, i1 %y, i32 3
+ ret <4 x i1> %4
+}
+
+define <4 x i1> @buildvec_mask_nonconst_v4i1_2(i1 %x, i1 %y) {
+; CHECK-LABEL: buildvec_mask_nonconst_v4i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sb a1, 15(sp)
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: sb a1, 14(sp)
+; CHECK-NEXT: sb a0, 13(sp)
+; CHECK-NEXT: sb zero, 12(sp)
+; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu
+; CHECK-NEXT: addi a0, sp, 12
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %1 = insertelement <4 x i1> undef, i1 0, i32 0
+ %2 = insertelement <4 x i1> %1, i1 %x, i32 1
+ %3 = insertelement <4 x i1> %2, i1 1, i32 2
+ %4 = insertelement <4 x i1> %3, i1 %y, i32 3
+ ret <4 x i1> %4
+}
+
define <8 x i1> @buildvec_mask_v8i1() {
; CHECK-LABEL: buildvec_mask_v8i1:
; CHECK: # %bb.0:
@@ -48,6 +173,124 @@ define <8 x i1> @buildvec_mask_v8i1() {
ret <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>
}
+define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) {
+; CHECK-LABEL: buildvec_mask_nonconst_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 19
+; CHECK-NEXT: vsetivli a3, 1, e8,mf8,ta,mu
+; CHECK-NEXT: vmv.s.x v0, a2
+; CHECK-NEXT: vsetivli a2, 8, e8,mf2,ta,mu
+; CHECK-NEXT: vmv.v.x v25, a1
+; CHECK-NEXT: vmerge.vxm v25, v25, a0, v0
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: ret
+ %1 = insertelement <8 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <8 x i1> %1, i1 %x, i32 1
+ %3 = insertelement <8 x i1> %2, i1 %y, i32 2
+ %4 = insertelement <8 x i1> %3, i1 %y, i32 3
+ %5 = insertelement <8 x i1> %4, i1 %x, i32 4
+ %6 = insertelement <8 x i1> %5, i1 %y, i32 5
+ %7 = insertelement <8 x i1> %6, i1 %y, i32 6
+ %8 = insertelement <8 x i1> %7, i1 %y, i32 7
+ ret <8 x i1> %8
+}
+
+define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
+; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sb a2, 15(sp)
+; CHECK-NEXT: sb zero, 14(sp)
+; CHECK-NEXT: sb a3, 13(sp)
+; CHECK-NEXT: sb a0, 12(sp)
+; CHECK-NEXT: sb a1, 11(sp)
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: sb a1, 10(sp)
+; CHECK-NEXT: sb a0, 9(sp)
+; CHECK-NEXT: sb a0, 8(sp)
+; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %1 = insertelement <8 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <8 x i1> %1, i1 %x, i32 1
+ %3 = insertelement <8 x i1> %2, i1 1, i32 2
+ %4 = insertelement <8 x i1> %3, i1 %y, i32 3
+ %5 = insertelement <8 x i1> %4, i1 %x, i32 4
+ %6 = insertelement <8 x i1> %5, i1 %w, i32 5
+ %7 = insertelement <8 x i1> %6, i1 0, i32 6
+ %8 = insertelement <8 x i1> %7, i1 %z, i32 7
+ ret <8 x i1> %8
+}
+
+define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) optsize {
+; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sb a2, 15(sp)
+; CHECK-NEXT: sb zero, 14(sp)
+; CHECK-NEXT: sb a3, 13(sp)
+; CHECK-NEXT: sb a0, 12(sp)
+; CHECK-NEXT: sb a1, 11(sp)
+; CHECK-NEXT: addi a1, zero, 1
+; CHECK-NEXT: sb a1, 10(sp)
+; CHECK-NEXT: sb a0, 9(sp)
+; CHECK-NEXT: sb a0, 8(sp)
+; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %1 = insertelement <8 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <8 x i1> %1, i1 %x, i32 1
+ %3 = insertelement <8 x i1> %2, i1 1, i32 2
+ %4 = insertelement <8 x i1> %3, i1 %y, i32 3
+ %5 = insertelement <8 x i1> %4, i1 %x, i32 4
+ %6 = insertelement <8 x i1> %5, i1 %w, i32 5
+ %7 = insertelement <8 x i1> %6, i1 0, i32 6
+ %8 = insertelement <8 x i1> %7, i1 %z, i32 7
+ ret <8 x i1> %8
+}
+
+define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
+; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sb a1, 15(sp)
+; CHECK-NEXT: sb a1, 14(sp)
+; CHECK-NEXT: sb a1, 13(sp)
+; CHECK-NEXT: sb a0, 12(sp)
+; CHECK-NEXT: sb a1, 11(sp)
+; CHECK-NEXT: sb a1, 10(sp)
+; CHECK-NEXT: sb a0, 9(sp)
+; CHECK-NEXT: sb a0, 8(sp)
+; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %1 = insertelement <8 x i1> undef, i1 %x, i32 0
+ %2 = insertelement <8 x i1> %1, i1 %x, i32 1
+ %3 = insertelement <8 x i1> %2, i1 %y, i32 2
+ %4 = insertelement <8 x i1> %3, i1 %y, i32 3
+ %5 = insertelement <8 x i1> %4, i1 %x, i32 4
+ %6 = insertelement <8 x i1> %5, i1 %y, i32 5
+ %7 = insertelement <8 x i1> %6, i1 %y, i32 6
+ %8 = insertelement <8 x i1> %7, i1 %y, i32 7
+ ret <8 x i1> %8
+}
+
define <10 x i1> @buildvec_mask_v10i1() {
; CHECK-LABEL: buildvec_mask_v10i1:
; CHECK: # %bb.0:
@@ -518,13 +761,13 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
;
; RV32-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1:
; RV32-LMULMAX4: # %bb.0:
-; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI10_0)
-; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI10_0)
+; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0)
+; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_0)
; RV32-LMULMAX4-NEXT: addi a1, zero, 64
; RV32-LMULMAX4-NEXT: vsetvli a1, a1, e8,m4,ta,mu
; RV32-LMULMAX4-NEXT: vle1.v v0, (a0)
-; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI10_1)
-; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI10_1)
+; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_1)
+; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_1)
; RV32-LMULMAX4-NEXT: vle1.v v8, (a0)
; RV32-LMULMAX4-NEXT: ret
;
@@ -551,8 +794,8 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
;
; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1:
; RV32-LMULMAX8: # %bb.0:
-; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI10_0)
-; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI10_0)
+; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI21_0)
+; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI21_0)
; RV32-LMULMAX8-NEXT: addi a1, zero, 128
; RV32-LMULMAX8-NEXT: vsetvli a1, a1, e8,m8,ta,mu
; RV32-LMULMAX8-NEXT: vle1.v v0, (a0)
@@ -560,8 +803,8 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
;
; RV64-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1:
; RV64-LMULMAX8: # %bb.0:
-; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI10_0)
-; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI10_0)
+; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI21_0)
+; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI21_0)
; RV64-LMULMAX8-NEXT: addi a1, zero, 128
; RV64-LMULMAX8-NEXT: vsetvli a1, a1, e8,m8,ta,mu
; RV64-LMULMAX8-NEXT: vle1.v v0, (a0)
More information about the llvm-commits
mailing list