[llvm] c767cf2 - [SVE] Add support for lowering GEPs involving scalable vectors.

Thu Feb 20 13:46:06 PST 2020

Author: Eli Friedman
Date: 2020-02-20T13:45:41-08:00
New Revision: c767cf24e48d9f6c17179abf2e42497601c7165b

URL: https://github.com/llvm/llvm-project/commit/c767cf24e48d9f6c17179abf2e42497601c7165b
DIFF: https://github.com/llvm/llvm-project/commit/c767cf24e48d9f6c17179abf2e42497601c7165b.diff

LOG: [SVE] Add support for lowering GEPs involving scalable vectors.

This includes both GEPs where the indexed type is a scalable vector, and
GEPs where the result type is a scalable vector.

Differential Revision: https://reviews.llvm.org/D73602

Added: 
    llvm/test/CodeGen/AArch64/sve-gep.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e3c2139ef347..7cf3e43088f2 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3872,13 +3872,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
 
   // Normalize Vector GEP - all scalar operands should be converted to the
   // splat vector.
-  unsigned VectorWidth = I.getType()->isVectorTy() ?
-    I.getType()->getVectorNumElements() : 0;
+  bool IsVectorGEP = I.getType()->isVectorTy();
+  ElementCount VectorElementCount = IsVectorGEP ?
+    I.getType()->getVectorElementCount() : ElementCount(0, false);
 
-  if (VectorWidth && !N.getValueType().isVector()) {
+  if (IsVectorGEP && !N.getValueType().isVector()) {
     LLVMContext &Context = *DAG.getContext();
-    EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
-    N = DAG.getSplatBuildVector(VT, dl, N);
+    EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
+    if (VectorElementCount.Scalable)
+      N = DAG.getSplatVector(VT, dl, N);
+    else
+      N = DAG.getSplatBuildVector(VT, dl, N);
   }
 
   for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
@@ -3900,9 +3904,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
                         DAG.getConstant(Offset, dl, N.getValueType()), Flags);
       }
     } else {
+      // IdxSize is the width of the arithmetic according to IR semantics.
+      // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
+      // (and fix up the result later).
       unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
       MVT IdxTy = MVT::getIntegerVT(IdxSize);
-      APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+      TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+      // We intentionally mask away the high bits here; ElementSize may not
+      // fit in IdxTy.
+      APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
+      bool ElementScalable = ElementSize.isScalable();
 
       // If this is a scalar constant or a splat vector of constants,
       // handle it quickly.
@@ -3910,14 +3921,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       if (C && isa<VectorType>(C->getType()))
         C = C->getSplatValue();
 
-      if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
-        if (CI->isZero())
-          continue;
-        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
+      const auto *CI = dyn_cast_or_null<ConstantInt>(C);
+      if (CI && CI->isZero())
+        continue;
+      if (CI && !ElementScalable) {
+        APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
         LLVMContext &Context = *DAG.getContext();
-        SDValue OffsVal = VectorWidth ?
-          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
-          DAG.getConstant(Offs, dl, IdxTy);
+        SDValue OffsVal;
+        if (IsVectorGEP)
+          OffsVal = DAG.getConstant(
+              Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
+        else
+          OffsVal = DAG.getConstant(Offs, dl, IdxTy);
 
         // In an inbounds GEP with an offset that is nonnegative even when
         // interpreted as signed, assume there is no unsigned overflow.
@@ -3931,31 +3946,45 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         continue;
       }
 
-      // N = N + Idx * ElementSize;
+      // N = N + Idx * ElementMul;
       SDValue IdxN = getValue(Idx);
 
-      if (!IdxN.getValueType().isVector() && VectorWidth) {
-        EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
-        IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+      if (!IdxN.getValueType().isVector() && IsVectorGEP) {
+        EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
+                                  VectorElementCount);
+        if (VectorElementCount.Scalable)
+          IdxN = DAG.getSplatVector(VT, dl, IdxN);
+        else
+          IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
       }
 
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
 
-      // If this is a multiply by a power of two, turn it into a shl
-      // immediately.  This is a very common case.
-      if (ElementSize != 1) {
-        if (ElementSize.isPowerOf2()) {
-          unsigned Amt = ElementSize.logBase2();
-          IdxN = DAG.getNode(ISD::SHL, dl,
-                             N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, dl, IdxN.getValueType()));
-        } else {
-          SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
-                                          IdxN.getValueType());
-          IdxN = DAG.getNode(ISD::MUL, dl,
-                             N.getValueType(), IdxN, Scale);
+      if (ElementScalable) {
+        EVT VScaleTy = N.getValueType().getScalarType();
+        SDValue VScale = DAG.getNode(
+            ISD::VSCALE, dl, VScaleTy,
+            DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
+        if (IsVectorGEP)
+          VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
+        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+      } else {
+        // If this is a multiply by a power of two, turn it into a shl
+        // immediately.  This is a very common case.
+        if (ElementMul != 1) {
+          if (ElementMul.isPowerOf2()) {
+            unsigned Amt = ElementMul.logBase2();
+            IdxN = DAG.getNode(ISD::SHL, dl,
+                               N.getValueType(), IdxN,
+                               DAG.getConstant(Amt, dl, IdxN.getValueType()));
+          } else {
+            SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
+                                            IdxN.getValueType());
+            IdxN = DAG.getNode(ISD::MUL, dl,
+                               N.getValueType(), IdxN, Scale);
+          }
         }
       }
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll
new file mode 100644
index 000000000000..a798913bfde4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 2 x i64>* @scalar_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: scalar_of_scalable_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    add x0, x0, x8, lsl #2
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, i64 4
+  ret <vscale x 2 x i64>* %d
+}
+
+define <vscale x 2 x i64>* @scalar_of_scalable_2(<vscale x 2 x i64>* %base, i64 %offset) {
+; CHECK-LABEL: scalar_of_scalable_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    madd x0, x1, x8, x0
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, i64 %offset
+  ret <vscale x 2 x i64>* %d
+}
+
+define <vscale x 2 x i32>* @scalar_of_scalable_3(<vscale x 2 x i32>* %base, i64 %offset) {
+; CHECK-LABEL: scalar_of_scalable_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x8
+; CHECK-NEXT:    madd x0, x1, x8, x0
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base, i64 %offset
+  ret <vscale x 2 x i32>* %d
+}
+
+define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: fixed_of_scalable_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    dup v1.2d, x0
+; CHECK-NEXT:    add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <2 x i64> <i64 1, i64 1>
+  ret <2 x <vscale x 2 x i64>*> %d
+}
+
+define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_2(<2 x <vscale x 2 x i64>*> %base) {
+; CHECK-LABEL: fixed_of_scalable_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i64>, <2 x <vscale x 2 x i64>*> %base, <2 x i64> <i64 1, i64 1>
+  ret <2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_1(i8* %base) {
+; CHECK-LABEL: scalable_of_fixed_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, x0
+; CHECK-NEXT:    add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_2(<vscale x 2 x i8*> %base) {
+; CHECK-LABEL: scalable_of_fixed_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr i8, <vscale x 2 x i8*> %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_3(i8* %base, <vscale x 2 x i64> %idx) {
+; CHECK-LABEL: scalable_of_fixed_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    add z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_4(i8* %base, <vscale x 2 x i32> %idx) {
+; CHECK-LABEL: scalable_of_fixed_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    add z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %d = getelementptr i8, i8* %base, <vscale x 2 x i32> %idx
+  ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: scalable_of_scalable_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z0.d, x0
+; CHECK-NEXT:    mul z1.d, z1.d, #1
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_2(<vscale x 2 x <vscale x 2 x i64>*> %base) {
+; CHECK-LABEL: scalable_of_scalable_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mul z1.d, z1.d, #1
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_3(<vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i32> %idx) {
+; CHECK-LABEL: scalable_of_scalable_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i32> %idx
+  ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}