[llvm] cb3f415 - [PowerPC] Fix up memory ordering after combining BV to a load

Fri Dec 16 08:00:42 PST 2022

Thank you!

On Fri, Dec 16, 2022 at 5:59 PM Nemanja Ivanovic via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Nemanja Ivanovic
> Date: 2022-12-16T08:57:36-06:00
> New Revision: cb3f415cd2019df7d14683842198bc4b7a492bc5
>
> URL: https://github.com/llvm/llvm-project/commit/cb3f415cd2019df7d14683842198bc4b7a492bc5
> DIFF: https://github.com/llvm/llvm-project/commit/cb3f415cd2019df7d14683842198bc4b7a492bc5.diff
>
> LOG: [PowerPC] Fix up memory ordering after combining BV to a load
>
> The combiner for BUILD_VECTOR that merges consecutive
> loads into a wide load had two issues:
>
> - It didn't check that the input loads all have the
>   same input chain
> - It didn't update nodes that are chained to the original
>   loads to be chained to the new load
>
> This caused issues with bootstrap when
> 3c4d2a03968ccf5889bacffe02d6fa2443b0260f was committed.
> This patch fixes the issue so it can unblock this commit.
>
> Differential revision: https://reviews.llvm.org/D140046
>
> Added:
>     llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
>
> Modified:
>     llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
>     llvm/lib/Target/PowerPC/PPCISelLowering.cpp
>
> Removed:
>
>
>
> ################################################################################
> diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> index e9f61e7828b61..a39d0a50bd876 100644
> --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -11497,7 +11497,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
>      return false;
>    if (LD->getChain() != Base->getChain())
>      return false;
> -  EVT VT = LD->getValueType(0);
> +  EVT VT = LD->getMemoryVT();
>    if (VT.getSizeInBits() / 8 != Bytes)
>      return false;
>
>
> diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
> index a74a43c72df49..b13d0da227f50 100644
> --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
> +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
> @@ -14245,17 +14245,23 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
>    unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
>    SDValue FirstInput = N->getOperand(0);
>    bool IsRoundOfExtLoad = false;
> +  LoadSDNode *FirstLoad = nullptr;
>
>    if (FirstInput.getOpcode() == ISD::FP_ROUND &&
>        FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
> -    LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
> -    IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
> +    FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
> +    IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
>    }
>    // Not a build vector of (possibly fp_rounded) loads.
>    if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
>        N->getNumOperands() == 1)
>      return SDValue();
>
> +  if (!IsRoundOfExtLoad)
> +    FirstLoad = cast<LoadSDNode>(FirstInput);
> +
> +  SmallVector<LoadSDNode *, 4> InputLoads;
> +  InputLoads.push_back(FirstLoad);
>    for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
>      // If any inputs are fp_round(extload), they all must be.
>      if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
> @@ -14268,53 +14274,55 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
>
>      SDValue PreviousInput =
>        IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
> -    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
> -    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
> +    LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
> +    LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
>
>      // If any inputs are fp_round(extload), they all must be.
>      if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
>        return SDValue();
>
> -    if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
> +    // We only care about regular loads. The PPC-specific load intrinsics
> +    // will not lead to a merge opportunity.
> +    if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
>        InputsAreConsecutiveLoads = false;
> -    if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
> +    if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
>        InputsAreReverseConsecutive = false;
>
>      // Exit early if the loads are neither consecutive nor reverse consecutive.
>      if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
>        return SDValue();
> +    InputLoads.push_back(LD2);
>    }
>
>    assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
>           "The loads cannot be both consecutive and reverse consecutive.");
>
> -  SDValue FirstLoadOp =
> -    IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
> -  SDValue LastLoadOp =
> -    IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
> -                       N->getOperand(N->getNumOperands()-1);
> -
> -  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
> -  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
> +  SDValue WideLoad;
> +  SDValue ReturnSDVal;
>    if (InputsAreConsecutiveLoads) {
> -    assert(LD1 && "Input needs to be a LoadSDNode.");
> -    return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
> -                       LD1->getBasePtr(), LD1->getPointerInfo(),
> -                       LD1->getAlign());
> -  }
> -  if (InputsAreReverseConsecutive) {
> -    assert(LDL && "Input needs to be a LoadSDNode.");
> -    SDValue Load =
> -        DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(),
> -                    LDL->getPointerInfo(), LDL->getAlign());
> +    assert(FirstLoad && "Input needs to be a LoadSDNode.");
> +    WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
> +                           FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
> +                           FirstLoad->getAlign());
> +    ReturnSDVal = WideLoad;
> +  } else if (InputsAreReverseConsecutive) {
> +    LoadSDNode *LastLoad = InputLoads.back();
> +    assert(LastLoad && "Input needs to be a LoadSDNode.");
> +    WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
> +                           LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
> +                           LastLoad->getAlign());
>      SmallVector<int, 16> Ops;
>      for (int i = N->getNumOperands() - 1; i >= 0; i--)
>        Ops.push_back(i);
>
> -    return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
> -                                DAG.getUNDEF(N->getValueType(0)), Ops);
> -  }
> -  return SDValue();
> +    ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
> +                                       DAG.getUNDEF(N->getValueType(0)), Ops);
> +  } else
> +    return SDValue();
> +
> +  for (auto *LD : InputLoads)
> +    DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
> +  return ReturnSDVal;
>  }
>
>  // This function adds the required vector_shuffle needed to get
>
> diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
> new file mode 100644
> index 0000000000000..b45e83d71c4d8
> --- /dev/null
> +++ b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
> @@ -0,0 +1,59 @@
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | \
> +; RUN:   FileCheck %s
> +
> +%0 = type <{ %1, ptr, i32, [4 x i8] }>
> +%1 = type { %2 }
> +%2 = type { %3 }
> +%3 = type { ptr, ptr, ptr }
> +
> +$testfunc = comdat any
> +
> +declare void @_ZdlPv() local_unnamed_addr #0
> +
> +define void @testfunc(i64 %arg) local_unnamed_addr #0 comdat {
> +; CHECK-LABEL: testfunc:
> +; CHECK:       # %bb.0: # %bb
> +; CHECK-NEXT:    mflr 0
> +; CHECK-NEXT:    stdu 1, -80(1)
> +; CHECK-NEXT:    std 0, 96(1)
> +; CHECK-NEXT:    .cfi_def_cfa_offset 80
> +; CHECK-NEXT:    .cfi_offset lr, 16
> +; CHECK-NEXT:    .cfi_offset v30, -32
> +; CHECK-NEXT:    .cfi_offset v31, -16
> +; CHECK-NEXT:    li 4, 48
> +; CHECK-NEXT:    addi 3, 3, 24
> +; CHECK-NEXT:    stvx 30, 1, 4 # 16-byte Folded Spill
> +; CHECK-NEXT:    li 4, 64
> +; CHECK-NEXT:    stvx 31, 1, 4 # 16-byte Folded Spill
> +; CHECK-NEXT:    lxvd2x 63, 0, 3
> +; CHECK-NEXT:    xxswapd 62, 63
> +; CHECK-NEXT:    bc 12, 20, .LBB0_2
> +; CHECK-NEXT:  # %bb.1: # %bb37
> +; CHECK-NEXT:    bl _ZdlPv
> +; CHECK-NEXT:    nop
> +; CHECK-NEXT:  .LBB0_2: # %bb38
> +; CHECK-NEXT:    stxsiwx 62, 0, 3
> +; CHECK-NEXT:    stxsdx 63, 0, 3
> +; CHECK-NEXT:    li 3, 64
> +; CHECK-NEXT:    lvx 31, 1, 3 # 16-byte Folded Reload
> +; CHECK-NEXT:    li 3, 48
> +; CHECK-NEXT:    lvx 30, 1, 3 # 16-byte Folded Reload
> +; CHECK-NEXT:    addi 1, 1, 80
> +; CHECK-NEXT:    ld 0, 16(1)
> +; CHECK-NEXT:    mtlr 0
> +; CHECK-NEXT:    blr
> +bb:
> +  %i = inttoptr i64 %arg to ptr
> +  %i6 = getelementptr inbounds %0, ptr %i, i64 0, i32 1
> +  %i7 = load <12 x i8>, ptr %i6, align 8
> +  br i1 poison, label %bb38, label %bb37
> +
> +bb37:                                             ; preds = %bb
> +  tail call void @_ZdlPv() #1
> +  br label %bb38
> +
> +bb38:                                             ; preds = %bb37, %bb
> +  store <12 x i8> %i7, ptr poison, align 8
> +  ret void
> +}
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits