[llvm] cb3f415 - [PowerPC] Fix up memory ordering after combining BV to a load
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 16 08:00:42 PST 2022
Thank you!
On Fri, Dec 16, 2022 at 5:59 PM Nemanja Ivanovic via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Nemanja Ivanovic
> Date: 2022-12-16T08:57:36-06:00
> New Revision: cb3f415cd2019df7d14683842198bc4b7a492bc5
>
> URL: https://github.com/llvm/llvm-project/commit/cb3f415cd2019df7d14683842198bc4b7a492bc5
> DIFF: https://github.com/llvm/llvm-project/commit/cb3f415cd2019df7d14683842198bc4b7a492bc5.diff
>
> LOG: [PowerPC] Fix up memory ordering after combining BV to a load
>
> The combiner for BUILD_VECTOR that merges consecutive
> loads into a wide load had two issues:
>
> - It didn't check that the input loads all have the
> same input chain
> - It didn't update nodes that are chained to the original
> loads to be chained to the new load
>
> This caused issues with bootstrap when
> 3c4d2a03968ccf5889bacffe02d6fa2443b0260f was committed.
> This patch fixes the issue so it can unblock this commit.
>
> Differential revision: https://reviews.llvm.org/D140046
>
> Added:
> llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
>
> Modified:
> llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> llvm/lib/Target/PowerPC/PPCISelLowering.cpp
>
> Removed:
>
>
>
> ################################################################################
> diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> index e9f61e7828b61..a39d0a50bd876 100644
> --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -11497,7 +11497,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
> return false;
> if (LD->getChain() != Base->getChain())
> return false;
> - EVT VT = LD->getValueType(0);
> + EVT VT = LD->getMemoryVT();
> if (VT.getSizeInBits() / 8 != Bytes)
> return false;
>
>
> diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
> index a74a43c72df49..b13d0da227f50 100644
> --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
> +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
> @@ -14245,17 +14245,23 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
> unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
> SDValue FirstInput = N->getOperand(0);
> bool IsRoundOfExtLoad = false;
> + LoadSDNode *FirstLoad = nullptr;
>
> if (FirstInput.getOpcode() == ISD::FP_ROUND &&
> FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
> - LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
> - IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
> + FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
> + IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
> }
> // Not a build vector of (possibly fp_rounded) loads.
> if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
> N->getNumOperands() == 1)
> return SDValue();
>
> + if (!IsRoundOfExtLoad)
> + FirstLoad = cast<LoadSDNode>(FirstInput);
> +
> + SmallVector<LoadSDNode *, 4> InputLoads;
> + InputLoads.push_back(FirstLoad);
> for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
> // If any inputs are fp_round(extload), they all must be.
> if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
> @@ -14268,53 +14274,55 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
>
> SDValue PreviousInput =
> IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
> - LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
> - LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
> + LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
> + LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
>
> // If any inputs are fp_round(extload), they all must be.
> if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
> return SDValue();
>
> - if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
> + // We only care about regular loads. The PPC-specific load intrinsics
> + // will not lead to a merge opportunity.
> + if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
> InputsAreConsecutiveLoads = false;
> - if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
> + if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
> InputsAreReverseConsecutive = false;
>
> // Exit early if the loads are neither consecutive nor reverse consecutive.
> if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
> return SDValue();
> + InputLoads.push_back(LD2);
> }
>
> assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
> "The loads cannot be both consecutive and reverse consecutive.");
>
> - SDValue FirstLoadOp =
> - IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
> - SDValue LastLoadOp =
> - IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
> - N->getOperand(N->getNumOperands()-1);
> -
> - LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
> - LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
> + SDValue WideLoad;
> + SDValue ReturnSDVal;
> if (InputsAreConsecutiveLoads) {
> - assert(LD1 && "Input needs to be a LoadSDNode.");
> - return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
> - LD1->getBasePtr(), LD1->getPointerInfo(),
> - LD1->getAlign());
> - }
> - if (InputsAreReverseConsecutive) {
> - assert(LDL && "Input needs to be a LoadSDNode.");
> - SDValue Load =
> - DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(),
> - LDL->getPointerInfo(), LDL->getAlign());
> + assert(FirstLoad && "Input needs to be a LoadSDNode.");
> + WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
> + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
> + FirstLoad->getAlign());
> + ReturnSDVal = WideLoad;
> + } else if (InputsAreReverseConsecutive) {
> + LoadSDNode *LastLoad = InputLoads.back();
> + assert(LastLoad && "Input needs to be a LoadSDNode.");
> + WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
> + LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
> + LastLoad->getAlign());
> SmallVector<int, 16> Ops;
> for (int i = N->getNumOperands() - 1; i >= 0; i--)
> Ops.push_back(i);
>
> - return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
> - DAG.getUNDEF(N->getValueType(0)), Ops);
> - }
> - return SDValue();
> + ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
> + DAG.getUNDEF(N->getValueType(0)), Ops);
> + } else
> + return SDValue();
> +
> + for (auto *LD : InputLoads)
> + DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
> + return ReturnSDVal;
> }
>
> // This function adds the required vector_shuffle needed to get
>
> diff --git a/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
> new file mode 100644
> index 0000000000000..b45e83d71c4d8
> --- /dev/null
> +++ b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll
> @@ -0,0 +1,59 @@
> +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | \
> +; RUN: FileCheck %s
> +
> +%0 = type <{ %1, ptr, i32, [4 x i8] }>
> +%1 = type { %2 }
> +%2 = type { %3 }
> +%3 = type { ptr, ptr, ptr }
> +
> +$testfunc = comdat any
> +
> +declare void @_ZdlPv() local_unnamed_addr #0
> +
> +define void @testfunc(i64 %arg) local_unnamed_addr #0 comdat {
> +; CHECK-LABEL: testfunc:
> +; CHECK: # %bb.0: # %bb
> +; CHECK-NEXT: mflr 0
> +; CHECK-NEXT: stdu 1, -80(1)
> +; CHECK-NEXT: std 0, 96(1)
> +; CHECK-NEXT: .cfi_def_cfa_offset 80
> +; CHECK-NEXT: .cfi_offset lr, 16
> +; CHECK-NEXT: .cfi_offset v30, -32
> +; CHECK-NEXT: .cfi_offset v31, -16
> +; CHECK-NEXT: li 4, 48
> +; CHECK-NEXT: addi 3, 3, 24
> +; CHECK-NEXT: stvx 30, 1, 4 # 16-byte Folded Spill
> +; CHECK-NEXT: li 4, 64
> +; CHECK-NEXT: stvx 31, 1, 4 # 16-byte Folded Spill
> +; CHECK-NEXT: lxvd2x 63, 0, 3
> +; CHECK-NEXT: xxswapd 62, 63
> +; CHECK-NEXT: bc 12, 20, .LBB0_2
> +; CHECK-NEXT: # %bb.1: # %bb37
> +; CHECK-NEXT: bl _ZdlPv
> +; CHECK-NEXT: nop
> +; CHECK-NEXT: .LBB0_2: # %bb38
> +; CHECK-NEXT: stxsiwx 62, 0, 3
> +; CHECK-NEXT: stxsdx 63, 0, 3
> +; CHECK-NEXT: li 3, 64
> +; CHECK-NEXT: lvx 31, 1, 3 # 16-byte Folded Reload
> +; CHECK-NEXT: li 3, 48
> +; CHECK-NEXT: lvx 30, 1, 3 # 16-byte Folded Reload
> +; CHECK-NEXT: addi 1, 1, 80
> +; CHECK-NEXT: ld 0, 16(1)
> +; CHECK-NEXT: mtlr 0
> +; CHECK-NEXT: blr
> +bb:
> + %i = inttoptr i64 %arg to ptr
> + %i6 = getelementptr inbounds %0, ptr %i, i64 0, i32 1
> + %i7 = load <12 x i8>, ptr %i6, align 8
> + br i1 poison, label %bb38, label %bb37
> +
> +bb37: ; preds = %bb
> + tail call void @_ZdlPv() #1
> + br label %bb38
> +
> +bb38: ; preds = %bb37, %bb
> + store <12 x i8> %i7, ptr poison, align 8
> + ret void
> +}
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list