[llvm] cb3f415 - [PowerPC] Fix up memory ordering after combining BV to a load

Fri Dec 16 06:59:29 PST 2022

Author: Nemanja Ivanovic
Date: 2022-12-16T08:57:36-06:00
New Revision: cb3f415cd2019df7d14683842198bc4b7a492bc5

URL: https://github.com/llvm/llvm-project/commit/cb3f415cd2019df7d14683842198bc4b7a492bc5
DIFF: https://github.com/llvm/llvm-project/commit/cb3f415cd2019df7d14683842198bc4b7a492bc5.diff

LOG: [PowerPC] Fix up memory ordering after combining BV to a load

The combiner for BUILD_VECTOR that merges consecutive
loads into a wide load had two issues:

- It didn't check that the input loads all have the
  same input chain
- It didn't update nodes that are chained to the original
  loads to be chained to the new load

This caused issues with bootstrap when
3c4d2a03968ccf5889bacffe02d6fa2443b0260f was committed.
This patch fixes the issue so it can unblock this commit.

Differential revision: https://reviews.llvm.org/D140046

Added: 
    llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e9f61e7828b61..a39d0a50bd876 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -11497,7 +11497,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
     return false;
   if (LD->getChain() != Base->getChain())
     return false;
-  EVT VT = LD->getValueType(0);
+  EVT VT = LD->getMemoryVT();
   if (VT.getSizeInBits() / 8 != Bytes)
     return false;
 

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a74a43c72df49..b13d0da227f50 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14245,17 +14245,23 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
   SDValue FirstInput = N->getOperand(0);
   bool IsRoundOfExtLoad = false;
+  LoadSDNode *FirstLoad = nullptr;
 
   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
-    LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
-    IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
+    FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
+    IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
   }
   // Not a build vector of (possibly fp_rounded) loads.
   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
       N->getNumOperands() == 1)
     return SDValue();
 
+  if (!IsRoundOfExtLoad)
+    FirstLoad = cast<LoadSDNode>(FirstInput);
+
+  SmallVector<LoadSDNode *, 4> InputLoads;
+  InputLoads.push_back(FirstLoad);
   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
     // If any inputs are fp_round(extload), they all must be.
     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
@@ -14268,53 +14274,55 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
 
     SDValue PreviousInput =
       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
-    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
-    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
+    LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
+    LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
 
     // If any inputs are fp_round(extload), they all must be.
     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
       return SDValue();
 
-    if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
+    // We only care about regular loads. The PPC-specific load intrinsics
+    // will not lead to a merge opportunity.
+    if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
       InputsAreConsecutiveLoads = false;
-    if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
+    if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
       InputsAreReverseConsecutive = false;
 
     // Exit early if the loads are neither consecutive nor reverse consecutive.
     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
       return SDValue();
+    InputLoads.push_back(LD2);
   }
 
   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
          "The loads cannot be both consecutive and reverse consecutive.");
 
-  SDValue FirstLoadOp =
-    IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
-  SDValue LastLoadOp =
-    IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
-                       N->getOperand(N->getNumOperands()-1);
-
-  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
-  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
+  SDValue WideLoad;
+  SDValue ReturnSDVal;
   if (InputsAreConsecutiveLoads) {
-    assert(LD1 && "Input needs to be a LoadSDNode.");
-    return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
-                       LD1->getBasePtr(), LD1->getPointerInfo(),
-                       LD1->getAlign());
-  }
-  if (InputsAreReverseConsecutive) {
-    assert(LDL && "Input needs to be a LoadSDNode.");
-    SDValue Load =
-        DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(),
-                    LDL->getPointerInfo(), LDL->getAlign());
+    assert(FirstLoad && "Input needs to be a LoadSDNode.");
+    WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
+                           FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
+                           FirstLoad->getAlign());
+    ReturnSDVal = WideLoad;
+  } else if (InputsAreReverseConsecutive) {
+    LoadSDNode *LastLoad = InputLoads.back();
+    assert(LastLoad && "Input needs to be a LoadSDNode.");
+    WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
+                           LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
+                           LastLoad->getAlign());
     SmallVector<int, 16> Ops;
     for (int i = N->getNumOperands() - 1; i >= 0; i--)
       Ops.push_back(i);
 
-    return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
-                                DAG.getUNDEF(N->getValueType(0)), Ops);
-  }
-  return SDValue();
+    ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
+                                       DAG.getUNDEF(N->getValueType(0)), Ops);
+  } else
+    return SDValue();
+
+  for (auto *LD : InputLoads)
+    DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
+  return ReturnSDVal;
 }
 
 // This function adds the required vector_shuffle needed to get

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
new file mode 100644
index 0000000000000..b45e83d71c4d8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | \
+; RUN:   FileCheck %s
+
+%0 = type <{ %1, ptr, i32, [4 x i8] }>
+%1 = type { %2 }
+%2 = type { %3 }
+%3 = type { ptr, ptr, ptr }
+
+$testfunc = comdat any
+
+declare void @_ZdlPv() local_unnamed_addr #0
+
+define void @testfunc(i64 %arg) local_unnamed_addr #0 comdat {
+; CHECK-LABEL: testfunc:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stdu 1, -80(1)
+; CHECK-NEXT:    std 0, 96(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li 4, 48
+; CHECK-NEXT:    addi 3, 3, 24
+; CHECK-NEXT:    stvx 30, 1, 4 # 16-byte Folded Spill
+; CHECK-NEXT:    li 4, 64
+; CHECK-NEXT:    stvx 31, 1, 4 # 16-byte Folded Spill
+; CHECK-NEXT:    lxvd2x 63, 0, 3
+; CHECK-NEXT:    xxswapd 62, 63
+; CHECK-NEXT:    bc 12, 20, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %bb37
+; CHECK-NEXT:    bl _ZdlPv
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_2: # %bb38
+; CHECK-NEXT:    stxsiwx 62, 0, 3
+; CHECK-NEXT:    stxsdx 63, 0, 3
+; CHECK-NEXT:    li 3, 64
+; CHECK-NEXT:    lvx 31, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT:    li 3, 48
+; CHECK-NEXT:    lvx 30, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 80
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+bb:
+  %i = inttoptr i64 %arg to ptr
+  %i6 = getelementptr inbounds %0, ptr %i, i64 0, i32 1
+  %i7 = load <12 x i8>, ptr %i6, align 8
+  br i1 poison, label %bb38, label %bb37
+
+bb37:                                             ; preds = %bb
+  tail call void @_ZdlPv() #1
+  br label %bb38
+
+bb38:                                             ; preds = %bb37, %bb
+  store <12 x i8> %i7, ptr poison, align 8
+  ret void
+}