[llvm] c5e6c88 - [VP][SelectionDAG][RISCV] Add get_vector_length intrinsics and generic SelectionDAG support.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri May 26 09:06:45 PDT 2023
Author: Craig Topper
Date: 2023-05-26T09:06:38-07:00
New Revision: c5e6c886aabb36ab66b4ed835da243c2a3455ade
URL: https://github.com/llvm/llvm-project/commit/c5e6c886aabb36ab66b4ed835da243c2a3455ade
DIFF: https://github.com/llvm/llvm-project/commit/c5e6c886aabb36ab66b4ed835da243c2a3455ade.diff
LOG: [VP][SelectionDAG][RISCV] Add get_vector_length intrinsics and generic SelectionDAG support.
The generic implementation is umin(TC, VF * vscale).
Lowering to vsetvli for RISC-V will come in a future patch.
This patch is a pre-requisite to be able to CodeGen vectorized code from
D99750.
Reviewed By: reames, frasercrmck
Differential Revision: https://reviews.llvm.org/D149916
Added:
llvm/test/CodeGen/AArch64/get_vector_length.ll
llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
llvm/test/Verifier/get_vector_length.ll
Modified:
llvm/docs/LangRef.rst
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/IR/Verifier.cpp
Removed:
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 83f0e350a82da..98f5c8c8a53c1 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -18069,6 +18069,54 @@ Arguments:
None.
+'``llvm.experimental.get.vector.length``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare i32 @llvm.experimental.get.vector.length.i32(i32 %cnt, i32 immarg %vf, i1 immarg %scalable)
+ declare i32 @llvm.experimental.get.vector.length.i64(i64 %cnt, i32 immarg %vf, i1 immarg %scalable)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.get.vector.length.*``' intrinsics take a number of
+elements to process and returns how many of the elements can be processed
+with the requested vectorization factor.
+
+Arguments:
+""""""""""
+
+The first argument is an unsigned value of any scalar integer type and specifies
+the total number of elements to be processed. The second argument is an i32
+immediate for the vectorization factor. The third argument indicates if the
+vectorization factor should be multiplied by vscale.
+
+Semantics:
+""""""""""
+
+Returns a positive i32 value (explicit vector length) that is unknown at compile
+time and depends on the hardware specification.
+If the result value does not fit in the result type, then the result is
+a :ref:`poison value <poisonvalues>`.
+
+This intrinsic is intended to be used by loop vectorization with VP intrinsics
+in order to get the number of elements to process on each loop iteration. The
+result should be used to decrease the count for the next iteration until the
+count reaches zero.
+
+If the count is larger than the number of lanes in the type described by the
+last 2 arguments, this intrinsic may return a value less than the number of
+lanes implied by the type. The result will be at least as large as the result
+will be on any later loop iteration.
+
+This intrinsic will only return 0 if the input count is also 0. A non-zero input
+count will produce a non-zero result.
+
Matrix Intrinsics
-----------------
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 90208e65de0d2..fb69fdd80b106 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1066,6 +1066,9 @@ class SelectionDAG {
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
bool ConstantFold = true);
+ SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
+ bool ConstantFold = true);
+
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index dded1e59e9452..b2d73b286b0ad 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -459,6 +459,11 @@ class TargetLoweringBase {
return true;
}
+ virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF,
+ bool IsScalable) const {
+ return true;
+ }
+
// Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
// vecreduce(op(x, y)) for the reduction opcode RedOpc.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 9b49df277dd92..86f81124b8464 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2149,6 +2149,12 @@ def int_get_active_lane_mask:
[llvm_anyint_ty, LLVMMatchType<1>],
[IntrNoMem, IntrNoSync, IntrWillReturn]>;
+def int_experimental_get_vector_length:
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_anyint_ty, llvm_i32_ty, llvm_i1_ty],
+ [IntrNoMem, IntrNoSync, IntrWillReturn,
+ ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
+
def int_experimental_vp_splice:
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ffa9998949b40..473497af56748 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1956,6 +1956,15 @@ SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
}
+SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
+ bool ConstantFold) {
+ if (EC.isScalable())
+ return getVScale(DL, VT,
+ APInt(VT.getSizeInBits(), EC.getKnownMinValue()));
+
+ return getConstant(EC.getKnownMinValue(), DL, VT);
+}
+
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
APInt One(ResVT.getScalarSizeInBits(), 1);
return getStepVector(DL, ResVT, One);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e5e8175602865..1621ae4858229 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7319,6 +7319,40 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, SetCC);
return;
}
+ case Intrinsic::experimental_get_vector_length: {
+ assert(cast<ConstantInt>(I.getOperand(1))->getSExtValue() > 0 &&
+ "Expected positive VF");
+ unsigned VF = cast<ConstantInt>(I.getOperand(1))->getZExtValue();
+ bool IsScalable = cast<ConstantInt>(I.getOperand(2))->isOne();
+
+ SDValue Count = getValue(I.getOperand(0));
+ EVT CountVT = Count.getValueType();
+
+ if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
+ // Expand to a umin between the trip count and the maximum elements the type
+ // can hold.
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Extend the trip count to at least the result VT.
+ if (CountVT.bitsLT(VT)) {
+ Count = DAG.getNode(ISD::ZERO_EXTEND, sdl, VT, Count);
+ CountVT = VT;
+ }
+
+ SDValue MaxEVL = DAG.getElementCount(sdl, CountVT,
+ ElementCount::get(VF, IsScalable));
+
+ SDValue UMin = DAG.getNode(ISD::UMIN, sdl, CountVT, Count, MaxEVL);
+ // Clip to the result type if needed.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, sdl, VT, UMin);
+
+ setValue(&I, Trunc);
+ return;
+ }
case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 24333dd7c94c3..4ec940f2a67ee 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5469,6 +5469,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Call);
break;
}
+ case Intrinsic::experimental_get_vector_length: {
+ ConstantInt *VF = cast<ConstantInt>(Call.getArgOperand(1));
+ Check(!VF->isNegative() && !VF->isZero(),
+ "get_vector_length: VF must be positive", Call);
+ break;
+ }
case Intrinsic::masked_load: {
Check(Call.getType()->isVectorTy(), "masked_load: must return a vector",
Call);
diff --git a/llvm/test/CodeGen/AArch64/get_vector_length.ll b/llvm/test/CodeGen/AArch64/get_vector_length.ll
new file mode 100644
index 0000000000000..3a83edb339b84
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/get_vector_length.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s
+
+declare i32 @llvm.experimental.get.vector.length.i16(i16, i32, i1)
+declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i1)
+declare i32 @llvm.experimental.get.vector.length.i64(i64, i32, i1)
+
+define i32 @vector_length_i16(i16 zeroext %tc) {
+; CHECK-LABEL: vector_length_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: csel w0, w0, w8, lo
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i16(i16 %tc, i32 2, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: csel w0, w0, w8, lo
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 2, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_i64(i64 %tc) {
+; CHECK-LABEL: vector_length_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x0, x0, x8, lo
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i64(i64 %tc, i32 2, i1 true)
+ ret i32 %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
new file mode 100644
index 0000000000000..b002cbc6cd4d5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
@@ -0,0 +1,130 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare i32 @llvm.experimental.get.vector.length.i16(i16, i32, i1)
+declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i1)
+declare i32 @llvm.experimental.get.vector.length.i64(i64, i32, i1)
+
+define i32 @vector_length_i16(i16 zeroext %tc) {
+; CHECK-LABEL: vector_length_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: bltu a0, a1, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i16(i16 %tc, i32 2, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_i32(i32 zeroext %tc) {
+; RV32-LABEL: vector_length_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: bltu a0, a1, .LBB1_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB1_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: bltu a0, a1, .LBB1_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB1_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 2, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_XLen(iXLen zeroext %tc) {
+; RV32-LABEL: vector_length_XLen:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: bltu a0, a1, .LBB2_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB2_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_XLen:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: bltu a0, a1, .LBB2_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB2_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_i16_fixed(i16 zeroext %tc) {
+; CHECK-LABEL: vector_length_i16_fixed:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 2
+; CHECK-NEXT: bltu a0, a1, .LBB3_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a0, 2
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i16(i16 %tc, i32 2, i1 false)
+ ret i32 %a
+}
+
+define i32 @vector_length_i32_fixed(i32 zeroext %tc) {
+; RV32-LABEL: vector_length_i32_fixed:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 2
+; RV32-NEXT: bltu a0, a1, .LBB4_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a0, 2
+; RV32-NEXT: .LBB4_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_i32_fixed:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: li a1, 2
+; RV64-NEXT: bltu a0, a1, .LBB4_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a0, 2
+; RV64-NEXT: .LBB4_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 2, i1 false)
+ ret i32 %a
+}
+
+define i32 @vector_length_XLen_fixed(iXLen zeroext %tc) {
+; RV32-LABEL: vector_length_XLen_fixed:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 2
+; RV32-NEXT: bltu a0, a1, .LBB5_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a0, 2
+; RV32-NEXT: .LBB5_2:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vector_length_XLen_fixed:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: li a1, 2
+; RV64-NEXT: bltu a0, a1, .LBB5_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a0, 2
+; RV64-NEXT: .LBB5_2:
+; RV64-NEXT: ret
+ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 false)
+ ret i32 %a
+}
diff --git a/llvm/test/Verifier/get_vector_length.ll b/llvm/test/Verifier/get_vector_length.ll
new file mode 100644
index 0000000000000..2fb2e089cd69f
--- /dev/null
+++ b/llvm/test/Verifier/get_vector_length.ll
@@ -0,0 +1,17 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i1)
+
+define i32 @vector_length_negative_vf(i32 zeroext %tc) {
+ ; CHECK: get_vector_length: VF must be positive
+ ; CHECK-NEXT: %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 -1, i1 true)
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 -1, i1 true)
+ ret i32 %a
+}
+
+define i32 @vector_length_zero_vf(i32 zeroext %tc) {
+ ; CHECK: get_vector_length: VF must be positive
+ ; CHECK-NEXT: %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 0, i1 true)
+ %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 0, i1 true)
+ ret i32 %a
+}
More information about the llvm-commits
mailing list