[llvm] r363544 - [DAGCombiner] [CodeGenPrepare] More comprehensive GEP splitting

Mon Jun 17 03:54:12 PDT 2019

Author: luismarques
Date: Mon Jun 17 03:54:12 2019
New Revision: 363544

URL: http://llvm.org/viewvc/llvm-project?rev=363544&view=rev
Log:
[DAGCombiner] [CodeGenPrepare] More comprehensive GEP splitting

Some GEPs were not being split, presumably because that split would just be 
undone by the DAGCombiner. Not performing those splits can prevent important 
optimizations, such as preventing the element indices / member offsets from 
being (partially) folded into load/store instruction immediates. This patch:

- Makes the splits also occur in the cases where the base address and the GEP 
  are in the same BB.
- Ensures that the DAGCombiner doesn't reassociate them back again.

Differential Revision: https://reviews.llvm.org/D60294


Added:
    llvm/trunk/test/CodeGen/RISCV/split-offsets.ll
Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h
    llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
    llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll
    llvm/trunk/test/CodeGen/ARM/vector-spilling.ll
    llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll
    llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Mon Jun 17 03:54:12 2019
@@ -4202,15 +4202,11 @@ bool AddressingModeMatcher::matchOperati
         if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
             (BaseI && !isa<CastInst>(BaseI) &&
              !isa<GetElementPtrInst>(BaseI))) {
-          // If the base is an instruction, make sure the GEP is not in the same
-          // basic block as the base. If the base is an argument or global
-          // value, make sure the GEP is not in the entry block.  Otherwise,
-          // instruction selection can undo the split.  Also make sure the
-          // parent block allows inserting non-PHI instructions before the
-          // terminator.
+          // Make sure the parent block allows inserting non-PHI instructions
+          // before the terminator.
           BasicBlock *Parent =
               BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
-          if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
+          if (!Parent->getTerminator()->isEHPad())
             LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
         }
       }
@@ -4740,8 +4736,7 @@ bool CodeGenPrepare::optimizeMemoryInst(
         InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
 
     GetElementPtrInst *GEP = LargeOffsetGEP.first;
-    if (GEP && GEP->getParent() != MemoryInst->getParent() &&
-        !NewGEPBases.count(GEP)) {
+    if (GEP && !NewGEPBases.count(GEP)) {
       // If splitting the underlying data structure can reduce the offset of a
       // GEP, collect the GEP.  Skip the GEPs that are the new bases of
       // previously split data structures.

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Jun 17 03:54:12 2019
@@ -462,6 +462,9 @@ namespace {
     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
+    bool reassociationCanBreakAddressingModePattern(unsigned Opc,
+                                                    const SDLoc &DL, SDValue N0,
+                                                    SDValue N1);
     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
                                       SDValue N1);
     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
@@ -1039,6 +1042,62 @@ static bool isAnyConstantBuildVector(SDV
          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 }
 
+bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
+                                                             const SDLoc &DL,
+                                                             SDValue N0,
+                                                             SDValue N1) {
+  // Currently this only tries to ensure we don't undo the GEP splits done by
+  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
+  // we check if the following transformation would be problematic:
+  // (load/store (add, (add, x, offset1), offset2)) ->
+  // (load/store (add, x, offset1+offset2)).
+
+  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+    return false;
+
+  if (N0.hasOneUse())
+    return false;
+
+  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  auto *C2 = dyn_cast<ConstantSDNode>(N1);
+  if (!C1 || !C2)
+    return false;
+
+  const APInt &C1APIntVal = C1->getAPIntValue();
+  const APInt &C2APIntVal = C2->getAPIntValue();
+  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+    return false;
+
+  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+  if (CombinedValueIntVal.getBitWidth() > 64)
+    return false;
+  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+  for (SDNode *Node : N0->uses()) {
+    auto LoadStore = dyn_cast<MemSDNode>(Node);
+    if (LoadStore) {
+      // Is x[offset2] already not a legal addressing mode? If so then
+      // reassociating the constants breaks nothing (we test offset2 because
+      // that's the one we hope to fold into the load or store).
+      TargetLoweringBase::AddrMode AM;
+      AM.HasBaseReg = true;
+      AM.BaseOffs = C2APIntVal.getSExtValue();
+      EVT VT = LoadStore->getMemoryVT();
+      unsigned AS = LoadStore->getAddressSpace();
+      Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+        continue;
+
+      // Would x[offset1+offset2] still be a legal addressing mode?
+      AM.BaseOffs = CombinedValue;
+      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+        return true;
+    }
+  }
+
+  return false;
+}
+
 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
@@ -2262,9 +2321,10 @@ SDValue DAGCombiner::visitADDLike(SDNode
     return NewSel;
 
   // reassociate add
-  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
-    return RADD;
-
+  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+    if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+      return RADD;
+  }
   // fold ((0-A) + B) -> B-A
   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));

Modified: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h (original)
+++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h Mon Jun 17 03:54:12 2019
@@ -157,6 +157,7 @@ private:
   template <class NodeTy>
   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
 
+  bool shouldConsiderGEPOffsetSplit() const override { return true; }
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;

Modified: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll Mon Jun 17 03:54:12 2019
@@ -1,5 +1,5 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI -check-prefix=CI-NOHSA %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
@@ -172,9 +172,10 @@ entry:
 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8:
 ; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
 ; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
+; CI-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
+; CI-NOHSA-NOT: v_add
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
+; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
 ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
 
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
@@ -202,14 +203,19 @@ entry:
 
 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
 
-; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
-; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
-; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
-; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
-; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
-; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
-; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
-; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
+; SI: s_mov_b32 {{s[0-9]+}}, 0x13480
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
+; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
+; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
+; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
+; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
+; CI-NOHSA-DAG: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
+; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
+; CI-NOHSA-DAG: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
+; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
 
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}

Modified: llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll Mon Jun 17 03:54:12 2019
@@ -76,24 +76,25 @@ define void @aesea(<16 x i8>* %a0, <16 x
 ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
 
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]
 
-; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]
 
-; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]]
 
 ; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]]
 
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]
 
 ; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
+
 ; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]]
 }
@@ -165,20 +166,27 @@ define void @aesda(<16 x i8>* %a0, <16 x
 ; CHECK-LABEL: aesda:
 ; CHECK: aesd.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]
+
 ; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]]
+
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]
-; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
+
 ; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]
-; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
+
 ; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]]
+
 ; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]]
+
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QG]]
+
 ; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
 ; CHECK: aesd.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QH]]
@@ -207,6 +215,7 @@ entry:
 ; CHECK-LABEL: aes_load_store:
 ; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]
+
 ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
 ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
 }

Modified: llvm/trunk/test/CodeGen/ARM/vector-spilling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vector-spilling.ll?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vector-spilling.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vector-spilling.ll Mon Jun 17 03:54:12 2019
@@ -22,8 +22,8 @@ entry:
   %6 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 3
   %7 = load <8 x i64>, <8 x i64>* %6, align 8
 
-  %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-  %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 12, i32 4, i32 15, i32 14, i32 8, i32 13, i32 2, i32 9>
+  %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 1, i32 0, i32 3, i32 10, i32 5, i32 11, i32 7, i32 6>
 
   tail call void(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) @foo(<8 x i64> %1, <8 x i64> %3, <8 x i64> %5, <8 x i64> %7, <8 x i64> %8, <8 x i64> %9)
   ret void

Added: llvm/trunk/test/CodeGen/RISCV/split-offsets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/split-offsets.ll?rev=363544&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/split-offsets.ll (added)
+++ llvm/trunk/test/CodeGen/RISCV/split-offsets.ll Mon Jun 17 03:54:12 2019
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+
+; Check that memory accesses to array elements with large offsets have those
+; offsets split into a base offset, plus a smaller offset that is folded into
+; the memory operation. We should also only compute that base offset once,
+; since it can be shared for all memory operations in this test.
+define void @test1([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) {
+; RV32I-LABEL: test1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a2, 20
+; RV32I-NEXT:    addi a2, a2, -1920
+; RV32I-NEXT:    lw a0, 0(a0)
+; RV32I-NEXT:    add a0, a0, a2
+; RV32I-NEXT:    addi a3, zero, 1
+; RV32I-NEXT:    sw a3, 4(a0)
+; RV32I-NEXT:    addi a4, zero, 2
+; RV32I-NEXT:    sw a4, 0(a0)
+; RV32I-NEXT:    add a0, a1, a2
+; RV32I-NEXT:    sw a4, 4(a0)
+; RV32I-NEXT:    sw a3, 0(a0)
+; RV32I-NEXT:    .cfi_def_cfa_offset 0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test1:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    lui a2, 20
+; RV64I-NEXT:    addiw a2, a2, -1920
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    add a0, a0, a2
+; RV64I-NEXT:    addi a3, zero, 1
+; RV64I-NEXT:    sw a3, 4(a0)
+; RV64I-NEXT:    addi a4, zero, 2
+; RV64I-NEXT:    sw a4, 0(a0)
+; RV64I-NEXT:    add a0, a1, a2
+; RV64I-NEXT:    sw a4, 4(a0)
+; RV64I-NEXT:    sw a3, 0(a0)
+; RV64I-NEXT:    .cfi_def_cfa_offset 0
+; RV64I-NEXT:    ret
+entry:
+  %s = load [65536 x i32]*, [65536 x i32]** %sp
+  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000
+  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001
+  %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000
+  %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001
+  store i32 2, i32* %gep0
+  store i32 1, i32* %gep1
+  store i32 1, i32* %gep2
+  store i32 2, i32* %gep3
+  ret void
+}
+
+; Ditto. Check it when the GEPs are not in the entry block.
+define void @test2([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) {
+; RV32I-LABEL: test2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a3, 20
+; RV32I-NEXT:    addi a3, a3, -1920
+; RV32I-NEXT:    lw a0, 0(a0)
+; RV32I-NEXT:    add a0, a0, a3
+; RV32I-NEXT:    add a1, a1, a3
+; RV32I-NEXT:    mv a3, zero
+; RV32I-NEXT:    bge a3, a2, .LBB1_2
+; RV32I-NEXT:  .LBB1_1: # %while_body
+; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT:    sw a3, 4(a0)
+; RV32I-NEXT:    addi a4, a3, 1
+; RV32I-NEXT:    sw a4, 0(a0)
+; RV32I-NEXT:    sw a3, 4(a1)
+; RV32I-NEXT:    sw a4, 0(a1)
+; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    blt a3, a2, .LBB1_1
+; RV32I-NEXT:  .LBB1_2: # %while_end
+; RV32I-NEXT:    .cfi_def_cfa_offset 0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    lui a3, 20
+; RV64I-NEXT:    addiw a3, a3, -1920
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    add a0, a0, a3
+; RV64I-NEXT:    add a1, a1, a3
+; RV64I-NEXT:    sext.w a2, a2
+; RV64I-NEXT:    mv a3, zero
+; RV64I-NEXT:    sext.w a4, a3
+; RV64I-NEXT:    bge a4, a2, .LBB1_2
+; RV64I-NEXT:  .LBB1_1: # %while_body
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    sw a3, 4(a0)
+; RV64I-NEXT:    addi a4, a3, 1
+; RV64I-NEXT:    sw a4, 0(a0)
+; RV64I-NEXT:    sw a3, 4(a1)
+; RV64I-NEXT:    sw a4, 0(a1)
+; RV64I-NEXT:    mv a3, a4
+; RV64I-NEXT:    sext.w a4, a3
+; RV64I-NEXT:    blt a4, a2, .LBB1_1
+; RV64I-NEXT:  .LBB1_2: # %while_end
+; RV64I-NEXT:    .cfi_def_cfa_offset 0
+; RV64I-NEXT:    ret
+entry:
+  %s = load [65536 x i32]*, [65536 x i32]** %sp
+  br label %while_cond
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000
+  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001
+  %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000
+  %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+while_body:
+  %i = add i32 %phi, 1
+  %j = add i32 %phi, 2
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  store i32 %i, i32* %gep2
+  store i32 %phi, i32* %gep3
+  br label %while_cond
+while_end:
+  ret void
+}
+

Modified: llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/int-add-08.ll Mon Jun 17 03:54:12 2019
@@ -50,9 +50,8 @@ define void @f3(i128 *%aptr, i64 %base)
 ; Test the next doubleword up, which requires separate address logic for ALG.
 define void @f4(i128 *%aptr, i64 %base) {
 ; CHECK-LABEL: f4:
-; CHECK: lgr [[BASE:%r[1-5]]], %r3
-; CHECK: agfi [[BASE]], 524288
-; CHECK: alg {{%r[0-5]}}, 0([[BASE]])
+; CHECK: lay [[BASE:%r[1-5]]], 524280(%r3)
+; CHECK: alg {{%r[0-5]}}, 8([[BASE]])
 ; CHECK: alcg {{%r[0-5]}}, 524280(%r3)
 ; CHECK: br %r14
   %addr = add i64 %base, 524280
@@ -65,11 +64,10 @@ define void @f4(i128 *%aptr, i64 %base)
 }
 
 ; Test the next doubleword after that, which requires separate logic for
-; both instructions.  It would be better to create an anchor at 524288
-; that both instructions can use, but that isn't implemented yet.
+; both instructions.
 define void @f5(i128 *%aptr, i64 %base) {
 ; CHECK-LABEL: f5:
-; CHECK: alg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: alg {{%r[0-5]}}, 8({{%r[1-5]}})
 ; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: br %r14
   %addr = add i64 %base, 524288

Modified: llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll?rev=363544&r1=363543&r2=363544&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/int-sub-05.ll Mon Jun 17 03:54:12 2019
@@ -55,9 +55,8 @@ define void @f3(i64 %base) {
 ; Test the next doubleword up, which requires separate address logic for SLG.
 define void @f4(i64 %base) {
 ; CHECK-LABEL: f4:
-; CHECK: lgr [[BASE:%r[1-5]]], %r2
-; CHECK: agfi [[BASE]], 524288
-; CHECK: slg {{%r[0-5]}}, 0([[BASE]])
+; CHECK: lay [[BASE:%r[1-5]]], 524280(%r2)
+; CHECK: slg {{%r[0-5]}}, 8([[BASE]])
 ; CHECK: slbg {{%r[0-5]}}, 524280(%r2)
 ; CHECK: br %r14
   %addr = add i64 %base, 524280
@@ -71,11 +70,10 @@ define void @f4(i64 %base) {
 }
 
 ; Test the next doubleword after that, which requires separate logic for
-; both instructions.  It would be better to create an anchor at 524288
-; that both instructions can use, but that isn't implemented yet.
+; both instructions.
 define void @f5(i64 %base) {
 ; CHECK-LABEL: f5:
-; CHECK: slg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: slg {{%r[0-5]}}, 8({{%r[1-5]}})
 ; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: br %r14
   %addr = add i64 %base, 524288