[llvm] r330630 - Reland r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses.", with a fix for the bot failure.

Mon Apr 23 12:09:34 PDT 2018

Author: pcc
Date: Mon Apr 23 12:09:34 2018
New Revision: 330630

URL: http://llvm.org/viewvc/llvm-project?rev=330630&view=rev
Log:
Reland r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses.", with a fix for the bot failure.

This reland includes a check to prevent the DAG combiner from folding an
offset that is smaller than the existing one. This can cause oscillations
between two possible DAGs, which was the cause of the hang and later assertion
failure observed on the lnt-ctmark-aarch64-O3-flto bot.
http://green.lab.llvm.org/green/job/lnt-ctmark-aarch64-O3-flto/2024/

Original commit message:
> This is a code size win in code that takes offseted addresses
> frequently, such as C++ constructors that typically need to compute
> an offseted address of a vtable. This reduces the size of Chromium
> for Android's .text section by 108KB.

Differential Revision: https://reviews.llvm.org/D45199

Added:
    llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll
    llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll
    llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll
    llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
    llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp Mon Apr 23 12:09:34 2018
@@ -743,14 +743,16 @@ bool AArch64DAGToDAGISel::SelectAddrMode
     if (!GAN)
       return true;
 
-    const GlobalValue *GV = GAN->getGlobal();
-    unsigned Alignment = GV->getAlignment();
-    Type *Ty = GV->getValueType();
-    if (Alignment == 0 && Ty->isSized())
-      Alignment = DL.getABITypeAlignment(Ty);
+    if (GAN->getOffset() % Size == 0) {
+      const GlobalValue *GV = GAN->getGlobal();
+      unsigned Alignment = GV->getAlignment();
+      Type *Ty = GV->getValueType();
+      if (Alignment == 0 && Ty->isSized())
+        Alignment = DL.getABITypeAlignment(Ty);
 
-    if (Alignment >= Size)
-      return true;
+      if (Alignment >= Size)
+        return true;
+    }
   }
 
   if (CurDAG->isBaseWithConstantOffset(N)) {

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Apr 23 12:09:34 2018
@@ -577,6 +577,8 @@ AArch64TargetLowering::AArch64TargetLowe
   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
 
+  setTargetDAGCombine(ISD::GlobalAddress);
+
   MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
   MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
   MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
@@ -3677,7 +3679,8 @@ AArch64TargetLowering::LowerReturn(SDVal
 SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
                                              SelectionDAG &DAG,
                                              unsigned Flag) const {
-  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
+                                    N->getOffset(), Flag);
 }
 
 SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
@@ -3752,8 +3755,9 @@ SDValue AArch64TargetLowering::LowerGlob
   unsigned char OpFlags =
       Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
 
-  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
-         "unexpected offset in global node");
+  if (OpFlags != AArch64II::MO_NO_FLAG)
+    assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
+           "unexpected offset in global node");
 
   // This also catches the large code model case for Darwin.
   if ((OpFlags & AArch64II::MO_GOT) != 0) {
@@ -4991,10 +4995,8 @@ SDValue AArch64TargetLowering::LowerShif
 
 bool AArch64TargetLowering::isOffsetFoldingLegal(
     const GlobalAddressSDNode *GA) const {
-  DEBUG(dbgs() << "Skipping offset folding global address: ");
-  DEBUG(GA->dump());
-  DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
-        "addresses\n");
+  // Offsets are folded in the DAG combine rather than here so that we can
+  // intelligently choose an offset based on the uses.
   return false;
 }
 
@@ -10617,6 +10619,59 @@ static SDValue performNVCASTCombine(SDNo
   return SDValue();
 }
 
+// If all users of the globaladdr are of the form (globaladdr + constant), find
+// the smallest constant, fold it into the globaladdr's offset and rewrite the
+// globaladdr as (globaladdr + constant) - constant.
+static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
+                                           const AArch64Subtarget *Subtarget,
+                                           const TargetMachine &TM) {
+  auto *GN = dyn_cast<GlobalAddressSDNode>(N);
+  if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
+                 AArch64II::MO_NO_FLAG)
+    return SDValue();
+
+  uint64_t MinOffset = -1ull;
+  for (SDNode *N : GN->uses()) {
+    if (N->getOpcode() != ISD::ADD)
+      return SDValue();
+    auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
+    if (!C)
+      C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (!C)
+      return SDValue();
+    MinOffset = std::min(MinOffset, C->getZExtValue());
+  }
+  uint64_t Offset = MinOffset + GN->getOffset();
+
+  // Require that the new offset is larger than the existing one. Otherwise, we
+  // can end up oscillating between two possible DAGs, for example,
+  // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
+  if (Offset <= uint64_t(GN->getOffset()))
+    return SDValue();
+
+  // Check whether folding this offset is legal. It must not go out of bounds of
+  // the referenced object to avoid violating the code model, and must be
+  // smaller than 2^21 because this is the largest offset expressible in all
+  // object formats.
+  //
+  // This check also prevents us from folding negative offsets, which will end
+  // up being treated in the same way as large positive ones. They could also
+  // cause code model violations, and aren't really common enough to matter.
+  if (Offset >= (1 << 21))
+    return SDValue();
+
+  const GlobalValue *GV = GN->getGlobal();
+  Type *T = GV->getValueType();
+  if (!T->isSized() ||
+      Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
+    return SDValue();
+
+  SDLoc DL(GN);
+  SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
+  return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
+                     DAG.getConstant(MinOffset, DL, MVT::i64));
+}
+
 SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -10704,6 +10759,8 @@ SDValue AArch64TargetLowering::PerformDA
     default:
       break;
     }
+  case ISD::GlobalAddress:
+    return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
   }
   return SDValue();
 }

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-addrmode.ll Mon Apr 23 12:09:34 2018
@@ -5,32 +5,31 @@
 
 ; base + offset (imm9)
 ; CHECK: @t1
-; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
+; CHECK: ldr xzr, [x0, #8]
 ; CHECK: ret
-define void @t1() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
+define void @t1(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1
   %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
 ; base + offset (> imm9)
 ; CHECK: @t2
-; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
+; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264
 ; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
 ; CHECK: ret
-define void @t2() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
+define void @t2(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33
   %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
 ; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
 ; CHECK: @t3
-; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
+; CHECK: ldr xzr, [x0, #32760]
 ; CHECK: ret
-define void @t3() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
+define void @t3(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095
   %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
@@ -38,10 +37,10 @@ define void @t3() {
 ; base + unsigned offset (> imm12 * size of type in bytes)
 ; CHECK: @t4
 ; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
-; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
+; CHECK: ldr xzr, [x0, x[[NUM]]]
 ; CHECK: ret
-define void @t4() {
-  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
+define void @t4(i64* %object) {
+  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096
   %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
@@ -58,12 +57,12 @@ define void @t5(i64 %a) {
 
 ; base + reg + imm
 ; CHECK: @t6
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
+; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3
 ; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
 ; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
 ; CHECK: ret
-define void @t6(i64 %a) {
-  %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
+define void @t6(i64 %a, i64* %object) {
+  %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a
   %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
   %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vector-ldst.ll Mon Apr 23 12:09:34 2018
@@ -264,149 +264,196 @@ entry:
 
 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
 ; registers for unscaled vector accesses
- at str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
 
-define <1 x i64> @fct0() nounwind readonly ssp {
+define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct0:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <1 x i64>*
+  %0 = load <1 x i64>, <1 x i64>* %q, align 8
   ret <1 x i64> %0
 }
 
-define <2 x i32> @fct1() nounwind readonly ssp {
+define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct1:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i32>*
+  %0 = load <2 x i32>, <2 x i32>* %q, align 8
   ret <2 x i32> %0
 }
 
-define <4 x i16> @fct2() nounwind readonly ssp {
+define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct2:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i16>*
+  %0 = load <4 x i16>, <4 x i16>* %q, align 8
   ret <4 x i16> %0
 }
 
-define <8 x i8> @fct3() nounwind readonly ssp {
+define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct3:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i8>*
+  %0 = load <8 x i8>, <8 x i8>* %q, align 8
   ret <8 x i8> %0
 }
 
-define <2 x i64> @fct4() nounwind readonly ssp {
+define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct4:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i64>*
+  %0 = load <2 x i64>, <2 x i64>* %q, align 16
   ret <2 x i64> %0
 }
 
-define <4 x i32> @fct5() nounwind readonly ssp {
+define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct5:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i32>*
+  %0 = load <4 x i32>, <4 x i32>* %q, align 16
   ret <4 x i32> %0
 }
 
-define <8 x i16> @fct6() nounwind readonly ssp {
+define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct6:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i16>*
+  %0 = load <8 x i16>, <8 x i16>* %q, align 16
   ret <8 x i16> %0
 }
 
-define <16 x i8> @fct7() nounwind readonly ssp {
+define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
 entry:
 ; CHECK-LABEL: fct7:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <16 x i8>*
+  %0 = load <16 x i8>, <16 x i8>* %q, align 16
   ret <16 x i8> %0
 }
 
-define void @fct8() nounwind ssp {
+define void @fct8(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct8:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
-  store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <1 x i64>*
+  %0 = load <1 x i64>, <1 x i64>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <1 x i64>*
+  store <1 x i64> %0, <1 x i64>* %q2, align 8
   ret void
 }
 
-define void @fct9() nounwind ssp {
+define void @fct9(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct9:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
-  store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i32>*
+  %0 = load <2 x i32>, <2 x i32>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <2 x i32>*
+  store <2 x i32> %0, <2 x i32>* %q2, align 8
   ret void
 }
 
-define void @fct10() nounwind ssp {
+define void @fct10(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct10:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
-  store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i16>*
+  %0 = load <4 x i16>, <4 x i16>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <4 x i16>*
+  store <4 x i16> %0, <4 x i16>* %q2, align 8
   ret void
 }
 
-define void @fct11() nounwind ssp {
+define void @fct11(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct11:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
-  store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i8>*
+  %0 = load <8 x i8>, <8 x i8>* %q, align 8
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <8 x i8>*
+  store <8 x i8> %0, <8 x i8>* %q2, align 8
   ret void
 }
 
-define void @fct12() nounwind ssp {
+define void @fct12(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct12:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
-  store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <2 x i64>*
+  %0 = load <2 x i64>, <2 x i64>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <2 x i64>*
+  store <2 x i64> %0, <2 x i64>* %q2, align 16
   ret void
 }
 
-define void @fct13() nounwind ssp {
+define void @fct13(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct13:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
-  store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <4 x i32>*
+  %0 = load <4 x i32>, <4 x i32>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <4 x i32>*
+  store <4 x i32> %0, <4 x i32>* %q2, align 16
   ret void
 }
 
-define void @fct14() nounwind ssp {
+define void @fct14(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct14:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
-  store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <8 x i16>*
+  %0 = load <8 x i16>, <8 x i16>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <8 x i16>*
+  store <8 x i16> %0, <8 x i16>* %q2, align 16
   ret void
 }
 
-define void @fct15() nounwind ssp {
+define void @fct15(i8* %str) nounwind ssp {
 entry:
 ; CHECK-LABEL: fct15:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
-  store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
+  %p = getelementptr inbounds i8, i8* %str, i64 3
+  %q = bitcast i8* %p to <16 x i8>*
+  %0 = load <16 x i8>, <16 x i8>* %q, align 16
+  %p2 = getelementptr inbounds i8, i8* %str, i64 4
+  %q2 = bitcast i8* %p2 to <16 x i8>*
+  store <16 x i8> %0, <16 x i8>* %q2, align 16
   ret void
 }
 

Added: llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll?rev=330630&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fold-global-offsets.ll Mon Apr 23 12:09:34 2018
@@ -0,0 +1,69 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
+
+ at x1 = external hidden global [2 x i64]
+ at x2 = external hidden global [16777216 x i64]
+ at x3 = external hidden global { [9 x i8*], [8 x i8*] }
+
+define i64 @f1() {
+  ; CHECK: f1:
+  ; CHECK: adrp x8, x1+16
+  ; CHECK: ldr x0, [x8, :lo12:x1+16]
+  %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
+  ret i64 %l
+}
+
+define i64 @f2() {
+  ; CHECK: f2:
+  ; CHECK: adrp x8, x1
+  ; CHECK: add x8, x8, :lo12:x1
+  ; CHECK: ldr x0, [x8, #24]
+  %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
+  ret i64 %l
+}
+
+define i64 @f3() {
+  ; CHECK: f3:
+  ; CHECK: adrp x8, x1+1
+  ; CHECK: add x8, x8, :lo12:x1+1
+  ; CHECK: ldr x0, [x8]
+  %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
+  ret i64 %l
+}
+
+define [2 x i64] @f4() {
+  ; CHECK: f4:
+  ; CHECK: adrp x8, x2+8
+  ; CHECK: add x8, x8, :lo12:x2+8
+  ; CHECK: ldp x0, x1, [x8]
+  %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
+  ret [2 x i64] %l
+}
+
+define i64 @f5() {
+  ; CHECK: f5:
+  ; CHECK: adrp x8, x2+2097144
+  ; CHECK: ldr x0, [x8, :lo12:x2+2097144]
+  ; CHECK: ret
+  %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
+  ret i64 %l
+}
+
+define i64 @f6() {
+  ; CHECK: f6:
+  ; CHECK: adrp x8, x2
+  ; CHECK: add x8, x8, :lo12:x2
+  ; CHECK: orr w9, wzr, #0x200000
+  ; CHECK: ldr x0, [x8, x9]
+  ; CHECK: ret
+  %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
+  ret i64 %l
+}
+
+define i32 @f7() {
+entry:
+  ; CHECK: f7
+  ; CHECK: adrp x8, x3+108
+  ; CHECK: ldr w0, [x8, :lo12:x3+108]
+  %l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> <i64 undef, i64 ptrtoint (i8** getelementptr inbounds ({ [9 x i8*], [8 x i8*] }, { [9 x i8*], [8 x i8*] }* @x3, i64 0, inrange i32 1, i64 2) to i64)> to i128), i128 64) to i64) to i32*), i64 5)
+  ret i32 %l
+}

Modified: llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-merge-3.ll Mon Apr 23 12:09:34 2018
@@ -10,8 +10,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a
 ;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x at PAGE
 ;CHECK-APPLE-IOS-NOT: adrp
 ;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x at PAGEOFF
-;CHECK-APPLE-IOS: adrp	x9, __MergedGlobals_y at PAGE
-;CHECK-APPLE-IOS: add	x9, x9, __MergedGlobals_y at PAGEOFF
+;CHECK-APPLE-IOS: adrp	x9, __MergedGlobals_y at PAGE+12
+;CHECK-APPLE-IOS: str	w1, [x9, __MergedGlobals_y at PAGEOFF+12]
   %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
   %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
   store i32 %a1, i32* %x3, align 4

Modified: llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll Mon Apr 23 12:09:34 2018
@@ -44,9 +44,9 @@ define void @f2(i32 %a1, i32 %a2) nounwi
 
 ; CHECK-LABEL: f3:
 define void @f3(i32 %a1, i32 %a2) minsize nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #8]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8
+; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
   store i32 %a1, i32* @m3, align 4
   store i32 %a2, i32* @n3, align 4
@@ -57,10 +57,9 @@ define void @f3(i32 %a1, i32 %a2) minsiz
 
 ; CHECK-LABEL: f4:
 define void @f4(i32 %a1, i32 %a2) nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
 ; CHECK-NEXT: adrp x9, _n4 at PAGE
-; CHECK-NEXT: str w0, [x8, #8]
+; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8]
 ; CHECK-NEXT: str w1, [x9, _n4 at PAGEOFF]
 ; CHECK-NEXT: ret
   store i32 %a1, i32* @m3, align 4

Modified: llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll?rev=330630&r1=330629&r2=330630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/global-merge-ignore-single-use.ll Mon Apr 23 12:09:34 2018
@@ -38,9 +38,9 @@ define void @f2(i32 %a1, i32 %a2, i32 %a
 
 ; CHECK-LABEL: f3:
 define void @f3(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #12]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12
+; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
   store i32 %a1, i32* @m2, align 4
   store i32 %a2, i32* @n2, align 4