[llvm] r215597 - [FastISel][AArch64] Add support for more addressing modes.

Juergen Ributzka juergen at apple.com
Wed Aug 13 15:53:29 PDT 2014


Author: ributzka
Date: Wed Aug 13 17:53:29 2014
New Revision: 215597

URL: http://llvm.org/viewvc/llvm-project?rev=215597&view=rev
Log:
[FastISel][AArch64] Add support for more addressing modes.

FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.

For Example:
  lsl x1, x1, #3     --> ldr x0, [x0, x1, lsl #3]
  ldr x0, [x0, x1]

  sxtw x1, w1
  lsl x1, x1, #3     --> ldr x0, [x0, x1, sxtw #3]
  ldr x0, [x0, x1]

Added:
    llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp

Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=215597&r1=215596&r2=215597&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Aug 13 17:53:29 2014
@@ -41,7 +41,6 @@ using namespace llvm;
 namespace {
 
 class AArch64FastISel : public FastISel {
-
   class Address {
   public:
     typedef enum {
@@ -51,17 +50,23 @@ class AArch64FastISel : public FastISel
 
   private:
     BaseKind Kind;
+    AArch64_AM::ShiftExtendType ExtType;
     union {
       unsigned Reg;
       int FI;
     } Base;
+    unsigned OffsetReg;
+    unsigned Shift;
     int64_t Offset;
     const GlobalValue *GV;
 
   public:
-    Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
+    Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
+      OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
     void setKind(BaseKind K) { Kind = K; }
     BaseKind getKind() const { return Kind; }
+    void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
+    AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
     bool isRegBase() const { return Kind == RegBase; }
     bool isFIBase() const { return Kind == FrameIndexBase; }
     void setReg(unsigned Reg) {
@@ -72,6 +77,14 @@ class AArch64FastISel : public FastISel
       assert(isRegBase() && "Invalid base register access!");
       return Base.Reg;
     }
+    void setOffsetReg(unsigned Reg) {
+      assert(isRegBase() && "Invalid offset register access!");
+      OffsetReg = Reg;
+    }
+    unsigned getOffsetReg() const {
+      assert(isRegBase() && "Invalid offset register access!");
+      return OffsetReg;
+    }
     void setFI(unsigned FI) {
       assert(isFIBase() && "Invalid base frame index  access!");
       Base.FI = FI;
@@ -82,11 +95,11 @@ class AArch64FastISel : public FastISel
     }
     void setOffset(int64_t O) { Offset = O; }
     int64_t getOffset() { return Offset; }
+    void setShift(unsigned S) { Shift = S; }
+    unsigned getShift() { return Shift; }
 
     void setGlobalValue(const GlobalValue *G) { GV = G; }
     const GlobalValue *getGlobalValue() { return GV; }
-
-    bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
   };
 
   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
@@ -121,13 +134,12 @@ private:
   // Utility helper routines.
   bool isTypeLegal(Type *Ty, MVT &VT);
   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
-  bool ComputeAddress(const Value *Obj, Address &Addr);
+  bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
   bool ComputeCallAddress(const Value *V, Address &Addr);
-  bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
-                       bool UseUnscaled);
+  bool SimplifyAddress(Address &Addr, MVT VT);
   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
-                            unsigned Flags, MachineMemOperand *MMO,
-                            bool UseUnscaled);
+                            unsigned Flags, unsigned ScaleFactor,
+                            MachineMemOperand *MMO);
   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
                           unsigned Alignment);
@@ -137,9 +149,9 @@ private:
   // Emit functions.
   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
-                MachineMemOperand *MMO = nullptr, bool UseUnscaled = false);
+                MachineMemOperand *MMO = nullptr);
   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
-                 MachineMemOperand *MMO = nullptr, bool UseUnscaled = false);
+                 MachineMemOperand *MMO = nullptr);
   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
@@ -337,7 +349,8 @@ unsigned AArch64FastISel::TargetMaterial
 }
 
 // Computes the address to get to an object.
-bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
+bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
+{
   const User *U = nullptr;
   unsigned Opcode = Instruction::UserOp1;
   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -364,18 +377,18 @@ bool AArch64FastISel::ComputeAddress(con
     break;
   case Instruction::BitCast: {
     // Look through bitcasts.
-    return ComputeAddress(U->getOperand(0), Addr);
+    return ComputeAddress(U->getOperand(0), Addr, Ty);
   }
   case Instruction::IntToPtr: {
     // Look past no-op inttoptrs.
     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
-      return ComputeAddress(U->getOperand(0), Addr);
+      return ComputeAddress(U->getOperand(0), Addr, Ty);
     break;
   }
   case Instruction::PtrToInt: {
     // Look past no-op ptrtoints.
     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
-      return ComputeAddress(U->getOperand(0), Addr);
+      return ComputeAddress(U->getOperand(0), Addr, Ty);
     break;
   }
   case Instruction::GetElementPtr: {
@@ -417,7 +430,7 @@ bool AArch64FastISel::ComputeAddress(con
 
     // Try to grab the base operand now.
     Addr.setOffset(TmpOffset);
-    if (ComputeAddress(U->getOperand(0), Addr))
+    if (ComputeAddress(U->getOperand(0), Addr, Ty))
       return true;
 
     // We failed, restore everything and try the other options.
@@ -437,19 +450,86 @@ bool AArch64FastISel::ComputeAddress(con
     }
     break;
   }
-  case Instruction::Add:
+  case Instruction::Add: {
     // Adds of constants are common and easy enough.
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+    const Value *LHS = U->getOperand(0);
+    const Value *RHS = U->getOperand(1);
+
+    if (isa<ConstantInt>(LHS))
+      std::swap(LHS, RHS);
+
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
-      return ComputeAddress(U->getOperand(0), Addr);
+      return ComputeAddress(LHS, Addr, Ty);
     }
+
+    Address Backup = Addr;
+    if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
+      return true;
+    Addr = Backup;
+
     break;
   }
+  case Instruction::Shl:
+    if (Addr.getOffsetReg())
+      break;
+
+    if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      unsigned Val = CI->getZExtValue();
+      if (Val < 1 || Val > 3)
+        break;
+
+      uint64_t NumBytes = 0;
+      if (Ty && Ty->isSized()) {
+        uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+        NumBytes = NumBits / 8;
+        if (!isPowerOf2_64(NumBits))
+          NumBytes = 0;
+      }
+
+      if (NumBytes != (1 << Val))
+        break;
 
-  // Try to get this in a register if nothing else has worked.
-  if (!Addr.isValid())
-    Addr.setReg(getRegForValue(Obj));
-  return Addr.isValid();
+      Addr.setShift(Val);
+      Addr.setExtendType(AArch64_AM::LSL);
+
+      if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
+        if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+          U = I;
+
+      if (const auto *ZE = dyn_cast<ZExtInst>(U))
+        if (ZE->getOperand(0)->getType()->isIntegerTy(32))
+          Addr.setExtendType(AArch64_AM::UXTW);
+
+      if (const auto *SE = dyn_cast<SExtInst>(U))
+        if (SE->getOperand(0)->getType()->isIntegerTy(32))
+          Addr.setExtendType(AArch64_AM::SXTW);
+
+      unsigned Reg = getRegForValue(U->getOperand(0));
+      if (!Reg)
+        return false;
+      Addr.setOffsetReg(Reg);
+      return true;
+    }
+    break;
+  }
+
+  if (Addr.getReg()) {
+    if (!Addr.getOffsetReg()) {
+      unsigned Reg = getRegForValue(Obj);
+      if (!Reg)
+        return false;
+      Addr.setOffsetReg(Reg);
+      return true;
+    }
+    return false;
+  }
+
+  unsigned Reg = getRegForValue(Obj);
+  if (!Reg)
+    return false;
+  Addr.setReg(Reg);
+  return true;
 }
 
 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
@@ -531,50 +611,80 @@ bool AArch64FastISel::isLoadStoreTypeLeg
   return false;
 }
 
-bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
-                                      int64_t ScaleFactor, bool UseUnscaled) {
-  bool needsLowering = false;
-  int64_t Offset = Addr.getOffset();
+bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
+  unsigned ScaleFactor;
   switch (VT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i1:
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i32:
-  case MVT::i64:
-  case MVT::f32:
-  case MVT::f64:
-    if (!UseUnscaled)
-      // Using scaled, 12-bit, unsigned immediate offsets.
-      needsLowering = ((Offset & 0xfff) != Offset);
-    else
-      // Using unscaled, 9-bit, signed immediate offsets.
-      needsLowering = (Offset > 256 || Offset < -256);
-    break;
+  default: return false;
+  case MVT::i1:  // fall-through
+  case MVT::i8:  ScaleFactor = 1; break;
+  case MVT::i16: ScaleFactor = 2; break;
+  case MVT::i32: // fall-through
+  case MVT::f32: ScaleFactor = 4; break;
+  case MVT::i64: // fall-through
+  case MVT::f64: ScaleFactor = 8; break;
   }
 
-  //If this is a stack pointer and the offset needs to be simplified then put
+  bool ImmediateOffsetNeedsLowering = false;
+  bool RegisterOffsetNeedsLowering = false;
+  int64_t Offset = Addr.getOffset();
+  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
+    ImmediateOffsetNeedsLowering = true;
+  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
+           !isUInt<12>(Offset / ScaleFactor))
+    ImmediateOffsetNeedsLowering = true;
+
+  // Cannot encode an offset register and an immediate offset in the same
+  // instruction. Fold the immediate offset into the load/store instruction and
+  // emit an additonal add to take care of the offset register.
+  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
+      Addr.getOffsetReg())
+    RegisterOffsetNeedsLowering = true;
+
+  // If this is a stack pointer and the offset needs to be simplified then put
   // the alloca address into a register, set the base type back to register and
   // continue. This should almost never happen.
-  if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
+  if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
             ResultReg)
-        .addFrameIndex(Addr.getFI())
-        .addImm(0)
-        .addImm(0);
+      .addFrameIndex(Addr.getFI())
+      .addImm(0)
+      .addImm(0);
     Addr.setKind(Address::RegBase);
     Addr.setReg(ResultReg);
   }
 
+  if (RegisterOffsetNeedsLowering) {
+    unsigned ResultReg = 0;
+    if (Addr.getReg()) {
+      ResultReg = createResultReg(&AArch64::GPR64RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(AArch64::ADDXrs), ResultReg)
+        .addReg(Addr.getReg())
+        .addReg(Addr.getOffsetReg())
+        .addImm(Addr.getShift());
+    } else
+      ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
+                              /*Op0IsKill=*/false, Addr.getShift());
+    if (!ResultReg)
+      return false;
+
+    Addr.setReg(ResultReg);
+    Addr.setOffsetReg(0);
+    Addr.setShift(0);
+  }
+
   // Since the offset is too large for the load/store instruction get the
   // reg+offset into a register.
-  if (needsLowering) {
-    uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
-    unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
-                                      UnscaledOffset, MVT::i64);
-    if (ResultReg == 0)
+  if (ImmediateOffsetNeedsLowering) {
+    unsigned ResultReg = 0;
+    if (Addr.getReg())
+      ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
+                               /*IsKill=*/false, Offset, MVT::i64);
+    else
+      ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
+
+    if (!ResultReg)
       return false;
     Addr.setReg(ResultReg);
     Addr.setOffset(0);
@@ -585,11 +695,11 @@ bool AArch64FastISel::SimplifyAddress(Ad
 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
                                            const MachineInstrBuilder &MIB,
                                            unsigned Flags,
-                                           MachineMemOperand *MMO,
-                                           bool UseUnscaled) {
-  int64_t Offset = Addr.getOffset();
+                                           unsigned ScaleFactor,
+                                           MachineMemOperand *MMO) {
+  int64_t Offset = Addr.getOffset() / ScaleFactor;
   // Frame base works a bit differently. Handle it separately.
-  if (Addr.getKind() == Address::FrameIndexBase) {
+  if (Addr.isFIBase()) {
     int FI = Addr.getFI();
     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
     // and alignment should be based on the VT.
@@ -599,9 +709,19 @@ void AArch64FastISel::AddLoadStoreOperan
     // Now add the rest of the operands.
     MIB.addFrameIndex(FI).addImm(Offset);
   } else {
-    // Now add the rest of the operands.
-    MIB.addReg(Addr.getReg());
-    MIB.addImm(Offset);
+    assert(Addr.isRegBase() && "Unexpected address kind.");
+    if (Addr.getOffsetReg()) {
+      assert(Addr.getOffset() == 0 && "Unexpected offset");
+      bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
+                      Addr.getExtendType() == AArch64_AM::SXTX;
+      MIB.addReg(Addr.getReg());
+      MIB.addReg(Addr.getOffsetReg());
+      MIB.addImm(IsSigned);
+      MIB.addImm(Addr.getShift() != 0);
+    } else {
+      MIB.addReg(Addr.getReg());
+      MIB.addImm(Offset);
+    }
   }
 
   if (MMO)
@@ -609,72 +729,68 @@ void AArch64FastISel::AddLoadStoreOperan
 }
 
 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
-                               MachineMemOperand *MMO, bool UseUnscaled) {
+                               MachineMemOperand *MMO) {
+  // Simplify this down to something we can handle.
+  if (!SimplifyAddress(Addr, VT))
+    return false;
+
+  unsigned ScaleFactor;
+  switch (VT.SimpleTy) {
+  default: llvm_unreachable("Unexpected value type.");
+  case MVT::i1:  // fall-through
+  case MVT::i8:  ScaleFactor = 1; break;
+  case MVT::i16: ScaleFactor = 2; break;
+  case MVT::i32: // fall-through
+  case MVT::f32: ScaleFactor = 4; break;
+  case MVT::i64: // fall-through
+  case MVT::f64: ScaleFactor = 8; break;
+  }
+
   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
-  if (!UseUnscaled && Addr.getOffset() < 0)
-    UseUnscaled = true;
+  bool UseScaled = true;
+  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+    UseScaled = false;
+    ScaleFactor = 1;
+  }
+
+  static const unsigned OpcTable[4][6] = {
+    { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
+      AArch64::LDURSi,   AArch64::LDURDi },
+    { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
+      AArch64::LDRSui,   AArch64::LDRDui },
+    { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
+      AArch64::LDRSroX,  AArch64::LDRDroX },
+    { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
+      AArch64::LDRSroW,  AArch64::LDRDroW }
+  };
 
   unsigned Opc;
   const TargetRegisterClass *RC;
   bool VTIsi1 = false;
-  int64_t ScaleFactor = 0;
-  switch (VT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i1:
-    VTIsi1 = true;
-  // Intentional fall-through.
-  case MVT::i8:
-    Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
-    RC = &AArch64::GPR32RegClass;
-    ScaleFactor = 1;
-    break;
-  case MVT::i16:
-    Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
-    RC = &AArch64::GPR32RegClass;
-    ScaleFactor = 2;
-    break;
-  case MVT::i32:
-    Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
-    RC = &AArch64::GPR32RegClass;
-    ScaleFactor = 4;
-    break;
-  case MVT::i64:
-    Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
-    RC = &AArch64::GPR64RegClass;
-    ScaleFactor = 8;
-    break;
-  case MVT::f32:
-    Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
-    RC = TLI.getRegClassFor(VT);
-    ScaleFactor = 4;
-    break;
-  case MVT::f64:
-    Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
-    RC = TLI.getRegClassFor(VT);
-    ScaleFactor = 8;
-    break;
-  }
-  // Scale the offset.
-  if (!UseUnscaled) {
-    int64_t Offset = Addr.getOffset();
-    if (Offset & (ScaleFactor - 1))
-      // Retry using an unscaled, 9-bit, signed immediate offset.
-      return EmitLoad(VT, ResultReg, Addr, MMO, /*UseUnscaled*/ true);
+  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+                      Addr.getOffsetReg();
+  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+  if (Addr.getExtendType() == AArch64_AM::UXTW ||
+      Addr.getExtendType() == AArch64_AM::SXTW)
+    Idx++;
 
-    Addr.setOffset(Offset / ScaleFactor);
+  switch (VT.SimpleTy) {
+  default: llvm_unreachable("Unexpected value type.");
+  case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
+  case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
+  case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
+  case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
+  case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
+  case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
+  case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
   }
 
-  // Simplify this down to something we can handle.
-  if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
-    return false;
-
   // Create the base instruction, then add the operands.
   ResultReg = createResultReg(RC);
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                                     TII.get(Opc), ResultReg);
-  AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, MMO, UseUnscaled);
+  AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
 
   // Loading an i1 requires special handling.
   if (VTIsi1) {
@@ -682,8 +798,8 @@ bool AArch64FastISel::EmitLoad(MVT VT, u
     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
             ANDReg)
-        .addReg(ResultReg)
-        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+      .addReg(ResultReg)
+      .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
     ResultReg = ANDReg;
   }
   return true;
@@ -699,7 +815,7 @@ bool AArch64FastISel::SelectLoad(const I
 
   // See if we can handle this address.
   Address Addr;
-  if (!ComputeAddress(I->getOperand(0), Addr))
+  if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
     return false;
 
   unsigned ResultReg;
@@ -711,59 +827,63 @@ bool AArch64FastISel::SelectLoad(const I
 }
 
 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
-                                MachineMemOperand *MMO, bool UseUnscaled) {
-  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
-  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
-  if (!UseUnscaled && Addr.getOffset() < 0)
-    UseUnscaled = true;
+                                MachineMemOperand *MMO) {
+  // Simplify this down to something we can handle.
+  if (!SimplifyAddress(Addr, VT))
+    return false;
 
-  unsigned StrOpc;
-  bool VTIsi1 = false;
-  int64_t ScaleFactor = 0;
-  // Using scaled, 12-bit, unsigned immediate offsets.
+  unsigned ScaleFactor;
   switch (VT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i1:
-    VTIsi1 = true;
-  case MVT::i8:
-    StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
-    ScaleFactor = 1;
-    break;
-  case MVT::i16:
-    StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
-    ScaleFactor = 2;
-    break;
-  case MVT::i32:
-    StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
-    ScaleFactor = 4;
-    break;
-  case MVT::i64:
-    StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
-    ScaleFactor = 8;
-    break;
-  case MVT::f32:
-    StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
-    ScaleFactor = 4;
-    break;
-  case MVT::f64:
-    StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
-    ScaleFactor = 8;
-    break;
+  default: llvm_unreachable("Unexpected value type.");
+  case MVT::i1:  // fall-through
+  case MVT::i8:  ScaleFactor = 1; break;
+  case MVT::i16: ScaleFactor = 2; break;
+  case MVT::i32: // fall-through
+  case MVT::f32: ScaleFactor = 4; break;
+  case MVT::i64: // fall-through
+  case MVT::f64: ScaleFactor = 8; break;
   }
-  // Scale the offset.
-  if (!UseUnscaled) {
-    int64_t Offset = Addr.getOffset();
-    if (Offset & (ScaleFactor - 1))
-      // Retry using an unscaled, 9-bit, signed immediate offset.
-      return EmitStore(VT, SrcReg, Addr, MMO, /*UseUnscaled*/ true);
 
-    Addr.setOffset(Offset / ScaleFactor);
+  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
+  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
+  bool UseScaled = true;
+  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+    UseScaled = false;
+    ScaleFactor = 1;
   }
 
-  // Simplify this down to something we can handle.
-  if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
-    return false;
+
+  static const unsigned OpcTable[4][6] = {
+    { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
+      AArch64::STURSi,   AArch64::STURDi },
+    { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
+      AArch64::STRSui,   AArch64::STRDui },
+    { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
+      AArch64::STRSroX,  AArch64::STRDroX },
+    { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
+      AArch64::STRSroW,  AArch64::STRDroW }
+
+  };
+
+  unsigned Opc;
+  bool VTIsi1 = false;
+  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+                      Addr.getOffsetReg();
+  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+  if (Addr.getExtendType() == AArch64_AM::UXTW ||
+      Addr.getExtendType() == AArch64_AM::SXTW)
+    Idx++;
+
+  switch (VT.SimpleTy) {
+  default: llvm_unreachable("Unexpected value type.");
+  case MVT::i1:  VTIsi1 = true;
+  case MVT::i8:  Opc = OpcTable[Idx][0]; break;
+  case MVT::i16: Opc = OpcTable[Idx][1]; break;
+  case MVT::i32: Opc = OpcTable[Idx][2]; break;
+  case MVT::i64: Opc = OpcTable[Idx][3]; break;
+  case MVT::f32: Opc = OpcTable[Idx][4]; break;
+  case MVT::f64: Opc = OpcTable[Idx][5]; break;
+  }
 
   // Storing an i1 requires special handling.
   if (VTIsi1) {
@@ -771,14 +891,15 @@ bool AArch64FastISel::EmitStore(MVT VT,
     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
             ANDReg)
-        .addReg(SrcReg)
-        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+      .addReg(SrcReg)
+      .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
     SrcReg = ANDReg;
   }
   // Create the base instruction, then add the operands.
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(StrOpc)).addReg(SrcReg);
-  AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, MMO, UseUnscaled);
+                                    TII.get(Opc))
+                              .addReg(SrcReg);
+  AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
 
   return true;
 }
@@ -800,7 +921,7 @@ bool AArch64FastISel::SelectStore(const
 
   // See if we can handle this address.
   Address Addr;
-  if (!ComputeAddress(I->getOperand(1), Addr))
+  if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
     return false;
 
   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))

Added: llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll?rev=215597&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-addressing-modes.ll Wed Aug 13 17:53:29 2014
@@ -0,0 +1,425 @@
+; RUN: llc -mtriple=aarch64-apple-darwin                             < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+
+; Load / Store Base Register only
+define zeroext i1 @load_breg_i1(i1* %a) {
+; CHECK-LABEL: load_breg_i1
+; CHECK:       ldrb {{w[0-9]+}}, [x0]
+  %1 = load i1* %a
+  ret i1 %1
+}
+
+define zeroext i8 @load_breg_i8(i8* %a) {
+; CHECK-LABEL: load_breg_i8
+; CHECK:       ldrb {{w[0-9]+}}, [x0]
+  %1 = load i8* %a
+  ret i8 %1
+}
+
+define zeroext i16 @load_breg_i16(i16* %a) {
+; CHECK-LABEL: load_breg_i16
+; CHECK:       ldrh {{w[0-9]+}}, [x0]
+  %1 = load i16* %a
+  ret i16 %1
+}
+
+define i32 @load_breg_i32(i32* %a) {
+; CHECK-LABEL: load_breg_i32
+; CHECK:       ldr {{w[0-9]+}}, [x0]
+  %1 = load i32* %a
+  ret i32 %1
+}
+
+define i64 @load_breg_i64(i64* %a) {
+; CHECK-LABEL: load_breg_i64
+; CHECK:       ldr {{x[0-9]+}}, [x0]
+  %1 = load i64* %a
+  ret i64 %1
+}
+
+define float @load_breg_f32(float* %a) {
+; CHECK-LABEL: load_breg_f32
+; CHECK:       ldr {{s[0-9]+}}, [x0]
+  %1 = load float* %a
+  ret float %1
+}
+
+define double @load_breg_f64(double* %a) {
+; CHECK-LABEL: load_breg_f64
+; CHECK:       ldr {{d[0-9]+}}, [x0]
+  %1 = load double* %a
+  ret double %1
+}
+
+define void @store_breg_i1(i1* %a) {
+; CHECK-LABEL: store_breg_i1
+; CHECK:       strb {{wzr|w[0-9]+}}, [x0]
+  store i1 0, i1* %a
+  ret void
+}
+
+define void @store_breg_i8(i8* %a) {
+; CHECK-LABEL: store_breg_i8
+; CHECK:       strb wzr, [x0]
+  store i8 0, i8* %a
+  ret void
+}
+
+define void @store_breg_i16(i16* %a) {
+; CHECK-LABEL: store_breg_i16
+; CHECK:       strh wzr, [x0]
+  store i16 0, i16* %a
+  ret void
+}
+
+define void @store_breg_i32(i32* %a) {
+; CHECK-LABEL: store_breg_i32
+; CHECK:       str wzr, [x0]
+  store i32 0, i32* %a
+  ret void
+}
+
+define void @store_breg_i64(i64* %a) {
+; CHECK-LABEL: store_breg_i64
+; CHECK:       str xzr, [x0]
+  store i64 0, i64* %a
+  ret void
+}
+
+define void @store_breg_f32(float* %a) {
+; CHECK-LABEL: store_breg_f32
+; CHECK:       str {{wzr|s[0-9]+}}, [x0]
+  store float 0.0, float* %a
+  ret void
+}
+
+define void @store_breg_f64(double* %a) {
+; CHECK-LABEL: store_breg_f64
+; CHECK:       str {{xzr|d[0-9]+}}, [x0]
+  store double 0.0, double* %a
+  ret void
+}
+
+; Load / Store Base Register + Immediate Offset
+; Max supported negative offset
+define i32 @load_breg_immoff_1(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_1
+; CHECK:       ldur {{w[0-9]+}}, [x0, #-256]
+  %1 = add i64 %a, -256
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Min not-supported negative offset
+define i32 @load_breg_immoff_2(i64 %a) {
+; SDAG-LABEL: load_breg_immoff_2
+; SDAG:       sub [[REG:x[0-9]+]], x0, #257
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+; FAST-LABEL: load_breg_immoff_2
+; FAST:       add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, -257
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Max supported unscaled offset
+define i32 @load_breg_immoff_3(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_3
+; CHECK:       ldur {{w[0-9]+}}, [x0, #255]
+  %1 = add i64 %a, 255
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Min un-supported unscaled offset
+define i32 @load_breg_immoff_4(i64 %a) {
+; SDAG-LABEL: load_breg_immoff_4
+; SDAG:       add [[REG:x[0-9]+]], x0, #257
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+; FAST-LABEL: load_breg_immoff_4
+; FAST:       add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 257
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Max supported scaled offset
+define i32 @load_breg_immoff_5(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_5
+; CHECK:       ldr {{w[0-9]+}}, [x0, #16380]
+  %1 = add i64 %a, 16380
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Min un-supported scaled offset
+define i32 @load_breg_immoff_6(i64 %a) {
+; SDAG-LABEL: load_breg_immoff_6
+; SDAG:       add [[REG:x[0-9]+]], x0, #4, lsl #12
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+; FAST-LABEL: load_breg_immoff_6
+; FAST:       add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 16384
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Max supported negative offset
+define void @store_breg_immoff_1(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_1
+; CHECK:       stur wzr, [x0, #-256]
+  %1 = add i64 %a, -256
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Min not-supported negative offset
+define void @store_breg_immoff_2(i64 %a) {
+; SDAG-LABEL: store_breg_immoff_2
+; SDAG:       sub [[REG:x[0-9]+]], x0, #257
+; SDAG-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+; FAST-LABEL: store_breg_immoff_2
+; FAST:       add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, -257
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Max supported unscaled offset
+define void @store_breg_immoff_3(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_3
+; CHECK:       stur wzr, [x0, #255]
+  %1 = add i64 %a, 255
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Min un-supported unscaled offset
+define void @store_breg_immoff_4(i64 %a) {
+; SDAG-LABEL: store_breg_immoff_4
+; SDAG:       add [[REG:x[0-9]+]], x0, #257
+; SDAG-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+; FAST-LABEL: store_breg_immoff_4
+; FAST:       add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 257
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Max supported scaled offset
+define void @store_breg_immoff_5(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_5
+; CHECK:       str wzr, [x0, #16380]
+  %1 = add i64 %a, 16380
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Min un-supported scaled offset
+define void @store_breg_immoff_6(i64 %a) {
+; SDAG-LABEL: store_breg_immoff_6
+; SDAG:       add [[REG:x[0-9]+]], x0, #4, lsl #12
+; SDAG-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+; FAST-LABEL: store_breg_immoff_6
+; FAST:       add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 16384
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+define i64 @load_breg_immoff_7(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_7
+; CHECK:       ldr {{x[0-9]+}}, [x0, #48]
+  %1 = add i64 %a, 48
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Flip add operands
+define i64 @load_breg_immoff_8(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_8
+; CHECK:       ldr {{x[0-9]+}}, [x0, #48]
+  %1 = add i64 48, %a
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Load Base Register + Register Offset
+define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_offreg_1
+; CHECK:       ldr {{x[0-9]+}}, [x0, x1]
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Flip add operands
+define i64 @load_breg_offreg_2(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_offreg_2
+; CHECK:       ldr {{x[0-9]+}}, [x1, x0]
+  %1 = add i64 %b, %a
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Load Base Register + Register Offset + Immediate Offset
+define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_offreg_immoff_1
+; CHECK:       add [[REG:x[0-9]+]], x0, x1
+; CHECK-NEXT:  ldr x0, {{\[}}[[REG]], #48{{\]}}
+  %1 = add i64 %a, %b
+  %2 = add i64 %1, 48
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  ret i64 %4
+}
+
+define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_offreg_immoff_2
+; SDAG:       add [[REG1:x[0-9]+]], x0, x1
+; SDAG-NEXT:  add [[REG2:x[0-9]+]], [[REG1]], #15, lsl #12
+; SDAG-NEXT:  ldr x0, {{\[}}[[REG2]]{{\]}}
+; FAST-LABEL: load_breg_offreg_immoff_2
+; FAST:       add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  ldr x0, {{\[}}[[REG]], x1{{\]}}
+  %1 = add i64 %a, %b
+  %2 = add i64 %1, 61440
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  ret i64 %4
+}
+
+; Load Base Register + Scaled Register Offset
+define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_shift_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, x0, lsl #2]
+  %1 = shl i64 %a, 2
+  %2 = add i64 %1, %b
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  ret i32 %4
+}
+
+define i32 @load_breg_shift_offreg_2(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_shift_offreg_2
+; CHECK:       ldr {{w[0-9]+}}, [x1, x0, lsl #2]
+  %1 = shl i64 %a, 2
+  %2 = add i64 %b, %1
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  ret i32 %4
+}
+
+define i32 @load_breg_shift_offreg_3(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_shift_offreg_3
+; SDAG:       lsl [[REG:x[0-9]+]], x0, #2
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}}
+; FAST-LABEL: load_breg_shift_offreg_3
+; FAST:       lsl [[REG:x[0-9]+]], x1, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+  %1 = shl i64 %a, 2
+  %2 = shl i64 %b, 2
+  %3 = add i64 %1, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_shift_offreg_4(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_shift_offreg_4
+; SDAG:       lsl [[REG:x[0-9]+]], x1, #2
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+; FAST-LABEL: load_breg_shift_offreg_4
+; FAST:       lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}}
+  %1 = shl i64 %a, 2
+  %2 = shl i64 %b, 2
+  %3 = add i64 %2, %1
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_shift_offreg_5
+; SDAG:       lsl [[REG:x[0-9]+]], x1, #3
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+; FAST-LABEL: load_breg_shift_offreg_5
+; FAST:       lsl [[REG:x[0-9]+]], x1, {{x[0-9]+}}
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+  %1 = shl i64 %a, 2
+  %2 = shl i64 %b, 3
+  %3 = add i64 %1, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+
+; Load Base Register + Scaled Register Offset + Sign/Zero extension
+define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_zext_shift_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_zext_shift_offreg_2
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_2
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+





More information about the llvm-commits mailing list