[llvm-branch-commits] [llvm] 321d929 - [DAGCombine] Fix splitting indexed loads in ForwardStoreValueToDirectLoad()

Tue Apr 14 20:09:18 PDT 2020

Author: Nemanja Ivanovic
Date: 2020-04-14T20:05:13-07:00
New Revision: 321d929774c6fa0767e4ae5eb0881ad15e7a4664

URL: https://github.com/llvm/llvm-project/commit/321d929774c6fa0767e4ae5eb0881ad15e7a4664
DIFF: https://github.com/llvm/llvm-project/commit/321d929774c6fa0767e4ae5eb0881ad15e7a4664.diff

LOG: [DAGCombine] Fix splitting indexed loads in ForwardStoreValueToDirectLoad()

In DAGCombiner::visitLOAD() we perform some checks before breaking up an indexed
load. However, we don't do the same checking in ForwardStoreValueToDirectLoad()
which can lead to failures later during combining
(see: https://bugs.llvm.org/show_bug.cgi?id=45301).

This patch just adds the same checks to this function as well.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=45301

Differential revision: https://reviews.llvm.org/D76778

(cherry picked from commit 482141134729237072cb94248381dab96ce34374)

Added: 
    llvm/test/CodeGen/PowerPC/pr45301.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8ff04797c8d8..2476fd26f250 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -886,6 +886,13 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 }
 
+// Determine if this an indexed load with an opaque target constant index.
+static bool canSplitIdx(LoadSDNode *LD) {
+  return MaySplitLoadIndex &&
+         (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
+          !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
+}
+
 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
                                                              const SDLoc &DL,
                                                              SDValue N0,
@@ -14222,11 +14229,11 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
 
   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
     if (LD->isIndexed()) {
-      bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
-                    LD->getAddressingMode() == ISD::POST_DEC);
-      unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
-      SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
-                             LD->getOperand(1), LD->getOperand(2));
+      // Cannot handle opaque target constants and we must respect the user's
+      // request not to split indexes from loads.
+      if (!canSplitIdx(LD))
+        return SDValue();
+      SDValue Idx = SplitIndexingFromLoad(LD);
       SDValue Ops[] = {Val, Idx, Chain};
       return CombineTo(LD, Ops, 3);
     }
@@ -14322,14 +14329,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       // the indexing into an add/sub directly (that TargetConstant may not be
       // valid for a 
diff erent type of node, and we cannot convert an opaque
       // target constant into a regular constant).
-      bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
-                       cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
+      bool CanSplitIdx = canSplitIdx(LD);
 
-      if (!N->hasAnyUseOfValue(0) &&
-          ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
+      if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
         SDValue Index;
-        if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
+        if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
           Index = SplitIndexingFromLoad(LD);
           // Try to fold the base pointer arithmetic into subsequent loads and
           // stores.

diff  --git a/llvm/test/CodeGen/PowerPC/pr45301.ll b/llvm/test/CodeGen/PowerPC/pr45301.ll
new file mode 100644
index 000000000000..ee0c6c341cc0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr45301.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64-- -verify-machineinstrs \
+; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s
+%struct.e.0.1.2.3.12.29 = type { [10 x i32] }
+
+define dso_local void @g(%struct.e.0.1.2.3.12.29* %agg.result) local_unnamed_addr #0 {
+; CHECK-LABEL: g:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -112(r1)
+; CHECK-NEXT:    bl i
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addis r4, r2, g at toc@ha
+; CHECK-NEXT:    addi r4, r4, g at toc@l
+; CHECK-NEXT:    ld r5, 0(r4)
+; CHECK-NEXT:    std r5, 0(r3)
+; CHECK-NEXT:    ld r5, 16(r4)
+; CHECK-NEXT:    std r5, 16(r3)
+; CHECK-NEXT:    ld r6, 8(r4)
+; CHECK-NEXT:    std r6, 8(r3)
+; CHECK-NEXT:    ld r6, 24(r4)
+; CHECK-NEXT:    std r6, 24(r3)
+; CHECK-NEXT:    lwz r6, 0(r3)
+; CHECK-NEXT:    ld r4, 32(r4)
+; CHECK-NEXT:    std r4, 32(r3)
+; CHECK-NEXT:    li r4, 20
+; CHECK-NEXT:    stwbrx r6, 0, r3
+; CHECK-NEXT:    stwbrx r5, r3, r4
+; CHECK-NEXT:    addi r1, r1, 112
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call signext i32 bitcast (i32 (...)* @i to i32 ()*)()
+  %conv = sext i32 %call to i64
+  %0 = inttoptr i64 %conv to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(40) %0, i8* nonnull align 4 dereferenceable(40) bitcast (void (%struct.e.0.1.2.3.12.29*)* @g to i8*), i64 40, i1 false)
+  %1 = inttoptr i64 %conv to i32*
+  %2 = load i32, i32* %1, align 4
+  %rev.i = tail call i32 @llvm.bswap.i32(i32 %2)
+  store i32 %rev.i, i32* %1, align 4
+  %incdec.ptr.i.4 = getelementptr inbounds i32, i32* %1, i64 5
+  %3 = load i32, i32* %incdec.ptr.i.4, align 4
+  %rev.i.5 = tail call i32 @llvm.bswap.i32(i32 %3)
+  store i32 %rev.i.5, i32* %incdec.ptr.i.4, align 4
+  ret void
+}
+
+declare i32 @i(...) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare i32 @llvm.bswap.i32(i32)
+
+attributes #0 = { nounwind }