[llvm] c767cf2 - [SVE] Add support for lowering GEPs involving scalable vectors.
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 13:46:06 PST 2020
Author: Eli Friedman
Date: 2020-02-20T13:45:41-08:00
New Revision: c767cf24e48d9f6c17179abf2e42497601c7165b
URL: https://github.com/llvm/llvm-project/commit/c767cf24e48d9f6c17179abf2e42497601c7165b
DIFF: https://github.com/llvm/llvm-project/commit/c767cf24e48d9f6c17179abf2e42497601c7165b.diff
LOG: [SVE] Add support for lowering GEPs involving scalable vectors.
This includes both GEPs where the indexed type is a scalable vector, and
GEPs where the result type is a scalable vector.
Differential Revision: https://reviews.llvm.org/D73602
Added:
llvm/test/CodeGen/AArch64/sve-gep.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e3c2139ef347..7cf3e43088f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3872,13 +3872,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
- unsigned VectorWidth = I.getType()->isVectorTy() ?
- I.getType()->getVectorNumElements() : 0;
+ bool IsVectorGEP = I.getType()->isVectorTy();
+ ElementCount VectorElementCount = IsVectorGEP ?
+ I.getType()->getVectorElementCount() : ElementCount(0, false);
- if (VectorWidth && !N.getValueType().isVector()) {
+ if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
- EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
- N = DAG.getSplatBuildVector(VT, dl, N);
+ EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
+ if (VectorElementCount.Scalable)
+ N = DAG.getSplatVector(VT, dl, N);
+ else
+ N = DAG.getSplatBuildVector(VT, dl, N);
}
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
@@ -3900,9 +3904,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
+ // IdxSize is the width of the arithmetic according to IR semantics.
+ // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
+ // (and fix up the result later).
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
- APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+ TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ // We intentionally mask away the high bits here; ElementSize may not
+ // fit in IdxTy.
+ APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
+ bool ElementScalable = ElementSize.isScalable();
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
@@ -3910,14 +3921,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (C && isa<VectorType>(C->getType()))
C = C->getSplatValue();
- if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
- if (CI->isZero())
- continue;
- APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
+ const auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (CI && CI->isZero())
+ continue;
+ if (CI && !ElementScalable) {
+ APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
LLVMContext &Context = *DAG.getContext();
- SDValue OffsVal = VectorWidth ?
- DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
- DAG.getConstant(Offs, dl, IdxTy);
+ SDValue OffsVal;
+ if (IsVectorGEP)
+ OffsVal = DAG.getConstant(
+ Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
+ else
+ OffsVal = DAG.getConstant(Offs, dl, IdxTy);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
@@ -3931,31 +3946,45 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
continue;
}
- // N = N + Idx * ElementSize;
+ // N = N + Idx * ElementMul;
SDValue IdxN = getValue(Idx);
- if (!IdxN.getValueType().isVector() && VectorWidth) {
- EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
- IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+ if (!IdxN.getValueType().isVector() && IsVectorGEP) {
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
+ VectorElementCount);
+ if (VectorElementCount.Scalable)
+ IdxN = DAG.getSplatVector(VT, dl, IdxN);
+ else
+ IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
- // If this is a multiply by a power of two, turn it into a shl
- // immediately. This is a very common case.
- if (ElementSize != 1) {
- if (ElementSize.isPowerOf2()) {
- unsigned Amt = ElementSize.logBase2();
- IdxN = DAG.getNode(ISD::SHL, dl,
- N.getValueType(), IdxN,
- DAG.getConstant(Amt, dl, IdxN.getValueType()));
- } else {
- SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
- IdxN.getValueType());
- IdxN = DAG.getNode(ISD::MUL, dl,
- N.getValueType(), IdxN, Scale);
+ if (ElementScalable) {
+ EVT VScaleTy = N.getValueType().getScalarType();
+ SDValue VScale = DAG.getNode(
+ ISD::VSCALE, dl, VScaleTy,
+ DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
+ if (IsVectorGEP)
+ VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
+ IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+ } else {
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementMul != 1) {
+ if (ElementMul.isPowerOf2()) {
+ unsigned Amt = ElementMul.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, dl,
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, dl, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
+ IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, dl,
+ N.getValueType(), IdxN, Scale);
+ }
}
}
diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll
new file mode 100644
index 000000000000..a798913bfde4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 2 x i64>* @scalar_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: scalar_of_scalable_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: add x0, x0, x8, lsl #2
+; CHECK-NEXT: ret
+ %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, i64 4
+ ret <vscale x 2 x i64>* %d
+}
+
+define <vscale x 2 x i64>* @scalar_of_scalable_2(<vscale x 2 x i64>* %base, i64 %offset) {
+; CHECK-LABEL: scalar_of_scalable_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: madd x0, x1, x8, x0
+; CHECK-NEXT: ret
+ %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, i64 %offset
+ ret <vscale x 2 x i64>* %d
+}
+
+define <vscale x 2 x i32>* @scalar_of_scalable_3(<vscale x 2 x i32>* %base, i64 %offset) {
+; CHECK-LABEL: scalar_of_scalable_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x8
+; CHECK-NEXT: madd x0, x1, x8, x0
+; CHECK-NEXT: ret
+ %d = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base, i64 %offset
+ ret <vscale x 2 x i32>* %d
+}
+
+define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: fixed_of_scalable_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: dup v0.2d, x8
+; CHECK-NEXT: dup v1.2d, x0
+; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <2 x i64> <i64 1, i64 1>
+ ret <2 x <vscale x 2 x i64>*> %d
+}
+
+define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_2(<2 x <vscale x 2 x i64>*> %base) {
+; CHECK-LABEL: fixed_of_scalable_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %d = getelementptr <vscale x 2 x i64>, <2 x <vscale x 2 x i64>*> %base, <2 x i64> <i64 1, i64 1>
+ ret <2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_1(i8* %base) {
+; CHECK-LABEL: scalable_of_fixed_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, x0
+; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT: ret
+ %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+ %d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
+ ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_2(<vscale x 2 x i8*> %base) {
+; CHECK-LABEL: scalable_of_fixed_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT: ret
+ %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+ %d = getelementptr i8, <vscale x 2 x i8*> %base, <vscale x 2 x i64> %idx
+ ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_3(i8* %base, <vscale x 2 x i64> %idx) {
+; CHECK-LABEL: scalable_of_fixed_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: add z0.d, z1.d, z0.d
+; CHECK-NEXT: ret
+ %d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
+ ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_4(i8* %base, <vscale x 2 x i32> %idx) {
+; CHECK-LABEL: scalable_of_fixed_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: add z0.d, z1.d, z0.d
+; CHECK-NEXT: ret
+ %d = getelementptr i8, i8* %base, <vscale x 2 x i32> %idx
+ ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: scalable_of_scalable_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: mov z0.d, x0
+; CHECK-NEXT: mul z1.d, z1.d, #1
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+ %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
+ ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_2(<vscale x 2 x <vscale x 2 x i64>*> %base) {
+; CHECK-LABEL: scalable_of_scalable_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: mul z1.d, z1.d, #1
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+ %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i64> %idx
+ ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_3(<vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i32> %idx) {
+; CHECK-LABEL: scalable_of_scalable_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i32> %idx
+ ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}
More information about the llvm-commits
mailing list