[llvm] r219726 - [FastISel][AArch64] Add custom lowering for GEPs.

Tue Oct 14 14:41:23 PDT 2014

Author: ributzka
Date: Tue Oct 14 16:41:23 2014
New Revision: 219726

URL: http://llvm.org/viewvc/llvm-project?rev=219726&view=rev
Log:
[FastISel][AArch64] Add custom lowering for GEPs.

This is mostly a copy of the existing FastISel GEP code, but on AArch64 we bail
out even for simple cases, because the standard fastEmit functions don't cover
MUL and ADD is lowered inefficientily.

Added:
    llvm/trunk/test/CodeGen/AArch64/fast-isel-gep.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=219726&r1=219725&r2=219726&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Tue Oct 14 16:41:23 2014
@@ -134,6 +134,7 @@ private:
   bool selectBitCast(const Instruction *I);
   bool selectFRem(const Instruction *I);
   bool selectSDiv(const Instruction *I);
+  bool selectGetElementPtr(const Instruction *I);
 
   // Utility helper routines.
   bool isTypeLegal(Type *Ty, MVT &VT);
@@ -4541,6 +4542,88 @@ bool AArch64FastISel::selectSDiv(const I
   return true;
 }
 
+bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
+  unsigned N = getRegForValue(I->getOperand(0));
+  if (!N)
+    return false;
+  bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+  // into a single N = N + TotalOffset.
+  uint64_t TotalOffs = 0;
+  Type *Ty = I->getOperand(0)->getType();
+  MVT VT = TLI.getPointerTy();
+  for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
+    if (auto *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      // N = N + Offset
+      if (Field)
+        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+      // If this is a constant subscript, handle it quickly.
+      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->isZero())
+          continue;
+        // N = N + Offset
+        TotalOffs +=
+            DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+        continue;
+      }
+      if (TotalOffs) {
+        N = emitAddSub_ri(/*UseAdd=*/true, VT, N, NIsKill, TotalOffs);
+        if (!N) {
+          unsigned C = fastEmit_i(VT, VT, ISD::Constant, TotalOffs);
+          if (!C)
+            return false;
+          N = emitAddSub_rr(/*UseAdd=*/true, VT, N, NIsKill, C, true);
+          if (!N)
+            return false;
+        }
+        NIsKill = true;
+        TotalOffs = 0;
+      }
+
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+      unsigned IdxN = Pair.first;
+      bool IdxNIsKill = Pair.second;
+      if (!IdxN)
+        return false;
+
+      if (ElementSize != 1) {
+        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
+        if (!C)
+          return false;
+        IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
+        if (!IdxN)
+          return false;
+        IdxNIsKill = true;
+      }
+      N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+      if (!N)
+        return false;
+    }
+  }
+  if (TotalOffs) {
+    N = emitAddSub_ri(/*UseAdd=*/true, VT, N, NIsKill, TotalOffs);
+    if (!N) {
+      unsigned C = fastEmit_i(VT, VT, ISD::Constant, TotalOffs);
+      if (!C)
+        return false;
+      N = emitAddSub_rr(/*UseAdd=*/true, VT, N, NIsKill, C, true);
+      if (!N)
+        return false;
+    }
+  }
+
+  updateValueMap(I, N);
+  return true;
+}
+
 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
   switch (I->getOpcode()) {
   default:
@@ -4612,6 +4695,8 @@ bool AArch64FastISel::fastSelectInstruct
     return selectRet(I);
   case Instruction::FRem:
     return selectFRem(I);
+  case Instruction::GetElementPtr:
+    return selectGetElementPtr(I);
   }
 
   // fall-back to target-independent instruction selection.

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll?rev=219726&r1=219725&r2=219726&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll Tue Oct 14 16:41:23 2014
@@ -15,9 +15,8 @@ define void @main() nounwind {
 entry:
 ; CHECK: main
 ; CHECK: mov x29, sp
-; CHECK: mov x[[REG:[0-9]+]], sp
-; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
-; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
+; CHECK: mov [[REG:x[0-9]+]], sp
+; CHECK-NEXT: add x0, [[REG]], #8
   %E = alloca %struct.S2Ty, align 4
   %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
   call void @takeS1(%struct.S1Ty* %B)

Added: llvm/trunk/test/CodeGen/AArch64/fast-isel-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-gep.ll?rev=219726&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-gep.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-gep.ll Tue Oct 14 16:41:23 2014
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+%struct.foo = type { i32, i64, float, double }
+
+define double* @test_struct(%struct.foo* %f) {
+; CHECK-LABEL: test_struct
+; CHECK:       add x0, x0, #24
+  %1 = getelementptr inbounds %struct.foo* %f, i64 0, i32 3
+  ret double* %1
+}
+
+define i32* @test_array(i32* %a, i64 %i) {
+; CHECK-LABEL: test_array
+; CHECK:       orr [[REG:x[0-9]+]], xzr, #0x4
+; CHECK-NEXT:  madd  x0, x1, [[REG]], x0
+  %1 = getelementptr inbounds i32* %a, i64 %i
+  ret i32* %1
+}