[llvm] b64ddae - [RISCV] Lower experimental_get_vector_length intrinsic to vsetvli for some cases.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 5 15:02:21 PDT 2023
Author: Craig Topper
Date: 2023-06-05T15:02:11-07:00
New Revision: b64ddae8a294605819470ce2f8d8b4751d0ffe12
URL: https://github.com/llvm/llvm-project/commit/b64ddae8a294605819470ce2f8d8b4751d0ffe12
DIFF: https://github.com/llvm/llvm-project/commit/b64ddae8a294605819470ce2f8d8b4751d0ffe12.diff
LOG: [RISCV] Lower experimental_get_vector_length intrinsic to vsetvli for some cases.
This patch lowers to vsetvli when the AVL is i32 or XLenVT and
the VF is a power of 2 in the range [1, 64]. VLEN=32 is not supported
as we don't have a valid type mapping for that. VF=1 is not supported
with Zve32* only.
The element width is used to set the SEW for the vsetvli if possible.
Otherwise we use SEW=8.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D150824
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e8b6560036f08..acdbaccdaed87 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1175,6 +1175,33 @@ MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
return Subtarget.getXLenVT();
}
+// Return false if we can lower get_vector_length to a vsetvli intrinsic.
+bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
+ unsigned VF,
+ bool IsScalable) const {
+ if (!Subtarget.hasVInstructions())
+ return true;
+
+ if (!IsScalable)
+ return true;
+
+ if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
+ return true;
+
+ // Don't allow VF=1 if those types are't legal.
+ if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
+ return true;
+
+ // VLEN=32 support is incomplete.
+ if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
+ return true;
+
+ // The maximum VF is for the smallest element width with LMUL=8.
+ // VF must be a power of 2.
+ unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
+ return VF > MaxVF || !isPowerOf2_32(VF);
+}
+
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
@@ -6623,6 +6650,48 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
+// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
+// scalable vector llvm.get.vector.length for now.
+//
+// We need to convert from a scalable VF to a vsetvli with VLMax equal to
+// (vscale * VF). The vscale and VF are independent of element width. We use
+// SEW=8 for the vsetvli because it is the only element width that supports all
+// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
+// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
+// InsertVSETVLI pass can fix up the vtype of the vsetvli if a
diff erent
+// SEW and LMUL are better for the surrounding vector instructions.
+static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // The smallest LMUL is only valid for the smallest element width.
+ const unsigned ElementWidth = 8;
+
+ // Determine the VF that corresponds to LMUL 1 for ElementWidth.
+ unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
+ // We don't support VF==1 with ELEN==32.
+ unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+
+ unsigned VF = N->getConstantOperandVal(2);
+ assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
+ "Unexpected VF");
+
+ bool Fractional = VF < LMul1VF;
+ unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
+ unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
+ unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
+
+ SDLoc DL(N);
+
+ SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
+ SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
+
+ SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
+
+ SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+}
+
SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(0);
@@ -6648,6 +6717,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
+ case Intrinsic::experimental_get_vector_length:
+ return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
case Intrinsic::riscv_vmv_x_s:
assert(Op.getValueType() == XLenVT && "Unexpected VT!");
return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
@@ -9471,6 +9542,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
default:
llvm_unreachable(
"Don't know how to custom type legalize this intrinsic!");
+ case Intrinsic::experimental_get_vector_length: {
+ SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
case Intrinsic::riscv_orc_b: {
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 69d5dffa15d98..65c9cd763c6f3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -870,6 +870,9 @@ class RISCVTargetLowering : public TargetLowering {
MVT getVPExplicitVectorLengthTy() const override;
+ bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
+ bool IsScalable) const override;
+
/// RVV code generation for fixed length vectors does not lower all
/// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
/// merge. However, merging them creates a BUILD_VECTOR that is just as
diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
index b002cbc6cd4d5..1d42b6e3937c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
declare i32 @llvm.experimental.get.vector.length.i16(i16, i32, i1)
declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i1)
@@ -21,51 +21,19 @@ define i32 @vector_length_i16(i16 zeroext %tc) {
}
define i32 @vector_length_i32(i32 zeroext %tc) {
-; RV32-LABEL: vector_length_i32:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a1, a1, 2
-; RV32-NEXT: bltu a0, a1, .LBB1_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB1_2:
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vector_length_i32:
-; RV64: # %bb.0:
-; RV64-NEXT: sext.w a0, a0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a1, a1, 2
-; RV64-NEXT: bltu a0, a1, .LBB1_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB1_2:
-; RV64-NEXT: ret
+; CHECK-LABEL: vector_length_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT: ret
%a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 2, i1 true)
ret i32 %a
}
define i32 @vector_length_XLen(iXLen zeroext %tc) {
-; RV32-LABEL: vector_length_XLen:
-; RV32: # %bb.0:
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a1, a1, 2
-; RV32-NEXT: bltu a0, a1, .LBB2_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: .LBB2_2:
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vector_length_XLen:
-; RV64: # %bb.0:
-; RV64-NEXT: sext.w a0, a0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a1, a1, 2
-; RV64-NEXT: bltu a0, a1, .LBB2_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: mv a0, a1
-; RV64-NEXT: .LBB2_2:
-; RV64-NEXT: ret
+; CHECK-LABEL: vector_length_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT: ret
%a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 true)
ret i32 %a
}
@@ -128,3 +96,237 @@ define i32 @vector_length_XLen_fixed(iXLen zeroext %tc) {
%a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 false)
ret i32 %a
}
+
+define i32 @vector_length_vf1_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_vf1_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf8, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 1, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf1_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_vf1_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf8, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 1, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf2_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_vf2_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 2, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf2_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_vf2_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf4_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_vf4_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf2, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 4, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf4_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_vf4_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, mf2, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 4, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf8_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_vf8_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf8_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_vf8_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf16_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_vf16_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m2, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf16_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_vf16_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m2, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf32_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_vf32_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m4, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 32, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf32_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_vf32_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m4, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 32, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf64_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_vf64_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 64, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf64_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_vf64_XLen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 64, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf128_i32(i32 zeroext %tc) {
+; RV32-LABEL: vector_length_vf128_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: bltu a0, a1, .LBB20_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB20_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_vf128_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: bltu a0, a1, .LBB20_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB20_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 128, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf128_XLen(iXLen zeroext %tc) {
+; RV32-LABEL: vector_length_vf128_XLen:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: bltu a0, a1, .LBB21_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB21_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_vf128_XLen:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: bltu a0, a1, .LBB21_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB21_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 128, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf3_i32(i32 zeroext %tc) {
+; RV32-LABEL: vector_length_vf3_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a1, a1, 3
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: bltu a0, a1, .LBB22_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB22_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_vf3_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a1, a1, 3
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: bltu a0, a1, .LBB22_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB22_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 3, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_vf3_XLen(iXLen zeroext %tc) {
+; RV32-LABEL: vector_length_vf3_XLen:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a1, a1, 3
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: bltu a0, a1, .LBB23_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB23_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_vf3_XLen:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a1, a1, 3
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: bltu a0, a1, .LBB23_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB23_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 3, i1 true)
+ ret i32 %a
+}
More information about the llvm-commits
mailing list