[llvm] [SystemZ] Don't lower float/double ATOMIC_[LOAD|STORE] to [LOAD|STORE] (PR #75879)
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 9 18:08:44 PST 2024
https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/75879
>From 76bf0f9cdd0e3c288852d2020a118710680db038 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Mon, 18 Dec 2023 18:49:11 -0600
Subject: [PATCH 1/2] [SystemZ] Don't lower ATOMIC_[LOAD|STORE] to [LOAD|STORE]
(Use PatFrags for loads.) Try to convert to LOAD in select() instead. Was
e20dad7
---
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 11 +
.../SelectionDAG/LegalizeIntegerTypes.cpp | 13 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 +
.../SelectionDAG/SelectionDAGDumper.cpp | 12 +
.../Target/SystemZ/SystemZISelDAGToDAG.cpp | 66 ++
.../Target/SystemZ/SystemZISelLowering.cpp | 140 ++--
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 8 +-
llvm/lib/Target/SystemZ/SystemZInstrFP.td | 8 +-
llvm/lib/Target/SystemZ/SystemZOperators.td | 4 +
llvm/test/CodeGen/SystemZ/atomic-load-06.ll | 4 +-
llvm/test/CodeGen/SystemZ/atomic-memofolds.ll | 723 ++++++++++++++++++
llvm/test/CodeGen/SystemZ/atomic-store-06.ll | 5 +-
12 files changed, 945 insertions(+), 59 deletions(-)
create mode 100644 llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 65b06d0f457912..97c575752ae1ea 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -558,6 +558,7 @@ BEGIN_TWO_BYTE_PACK()
class LoadSDNodeBitfields {
friend class LoadSDNode;
+ friend class AtomicSDNode;
friend class VPLoadSDNode;
friend class VPStridedLoadSDNode;
friend class MaskedLoadSDNode;
@@ -1473,6 +1474,16 @@ class AtomicSDNode : public MemSDNode {
MMO->isAtomic()) && "then why are we using an AtomicSDNode?");
}
+ void setExtensionType(ISD::LoadExtType ETy) {
+ assert(getOpcode() == ISD::ATOMIC_LOAD && "Only used for atomic loads.");
+ LoadSDNodeBits.ExtTy = ETy;
+ }
+
+ ISD::LoadExtType getExtensionType() const {
+ assert(getOpcode() == ISD::ATOMIC_LOAD && "Only used for atomic loads.");
+ return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
+ }
+
const SDValue &getBasePtr() const {
return getOpcode() == ISD::ATOMIC_STORE ? getOperand(2) : getOperand(1);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 814f746f5a4d9d..f444f3010b0d8b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -340,6 +340,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
N->getMemoryVT(), ResVT,
N->getChain(), N->getBasePtr(),
N->getMemOperand());
+ if (N->getOpcode() == ISD::ATOMIC_LOAD) {
+ ISD::LoadExtType ETy = cast<AtomicSDNode>(N)->getExtensionType();
+ if (ETy == ISD::NON_EXTLOAD) {
+ if (TLI.getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ ETy = ISD::SEXTLOAD;
+ else if (TLI.getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ ETy = ISD::ZEXTLOAD;
+ else
+ ETy = ISD::EXTLOAD;
+ }
+ cast<AtomicSDNode>(Res)->setExtensionType(ETy);
+ }
+
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 01d31806c8442f..de366bf13d6d51 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4017,6 +4017,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Op.getResNo() == 0) {
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
Known.Zero.setBitsFrom(MemBits);
+ else if (Op->getOpcode() == ISD::ATOMIC_LOAD &&
+ cast<AtomicSDNode>(Op)->getExtensionType() == ISD::ZEXTLOAD)
+ Known.Zero.setBitsFrom(MemBits);
}
break;
}
@@ -4828,6 +4831,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits - Tmp + 1;
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
return VTBits - Tmp;
+ if (Op->getOpcode() == ISD::ATOMIC_LOAD) {
+ ISD::LoadExtType ETy = cast<AtomicSDNode>(Op)->getExtensionType();
+ if (ETy == ISD::SEXTLOAD)
+ return VTBits - Tmp + 1;
+ if (ETy == ISD::ZEXTLOAD)
+ return VTBits - Tmp;
+ }
}
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9ebef642e4232f..79c4d9734e940c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -828,6 +828,18 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
+ if (auto *A = dyn_cast<AtomicSDNode>(M))
+ if (A->getOpcode() == ISD::ATOMIC_LOAD) {
+ bool doExt = true;
+ switch (A->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << A->getMemoryVT();
+ }
OS << ">";
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(this)) {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 815eca1240d827..f4e6081ae82104 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -347,6 +347,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// Try to expand a boolean SELECT_CCMASK using an IPM sequence.
SDValue expandSelectBoolean(SDNode *Node);
+ // Convert ATOMIC_LOADs to LOADs to facilitate instruction selection.
+ void convertATOMIC_LOADs(SDNode *Node, unsigned Depth = 0);
+
public:
static char ID;
@@ -1513,6 +1516,10 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
MachineMemOperand *MMO = MemAccess->getMemOperand();
assert(MMO && "Expected a memory operand.");
+ // These instructions are not atomic.
+ if (MMO->isAtomic())
+ return false;
+
// The memory access must have a proper alignment and no index register.
if (MemAccess->getAlign().value() < StoreSize ||
!MemAccess->getOffset().isUndef())
@@ -1545,6 +1552,37 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
return true;
}
+// This is a hack to convert ATOMIC_LOADs to LOADs in the last minute just
+// before instruction selection begins. It would have been easier if
+// ATOMIC_LOAD nodes would instead always be built by SelectionDAGBuilder as
+// LOADs with an atomic MMO and properly handled as such in DAGCombiner, but
+// until that changes they need to remain as ATOMIC_LOADs until all
+// DAGCombining is done. Convert Node or any of its operands from
+// ATOMIC_LOAD to LOAD.
+void SystemZDAGToDAGISel::convertATOMIC_LOADs(SDNode *Node, unsigned Depth) {
+ if (Depth > 1) // Chain operands are also followed so this seems enough.
+ return;
+ if (Node->getOpcode() == ISD::ATOMIC_LOAD) {
+ auto *ALoad = cast<AtomicSDNode>(Node);
+ // It seems necessary to morph the node as it is not yet being selected.
+ LoadSDNode *Ld = cast<LoadSDNode>(CurDAG->MorphNodeTo(
+ ALoad, ISD::LOAD, CurDAG->getVTList(ALoad->getValueType(0), MVT::Other),
+ {ALoad->getChain(), ALoad->getBasePtr()}));
+ // Sanity check the morph. The extension type for an extending load
+ // should have been set prior to instruction selection and remain in the
+ // morphed node.
+ assert(((SDNode *)Ld) == ((SDNode *)ALoad) && "Bad CSE on atomic load.");
+ assert(Ld->getMemOperand()->isAtomic() && "Broken MMO.");
+ ISD::LoadExtType ETy = Ld->getExtensionType();
+ bool IsNonExt = Ld->getMemoryVT().getSizeInBits() ==
+ Ld->getValueType(0).getSizeInBits();
+ assert(IsNonExt == (ETy == ISD::NON_EXTLOAD) && "Bad extension type.");
+ return;
+ }
+ for (SDValue Op : Node->ops())
+ convertATOMIC_LOADs(Op.getNode(), ++Depth);
+}
+
void SystemZDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -1553,6 +1591,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
return;
}
+ // Prepare any ATOMIC_LOAD to be selected as a LOAD with an atomic MMO.
+ convertATOMIC_LOADs(Node);
+
unsigned Opcode = Node->getOpcode();
switch (Opcode) {
case ISD::OR:
@@ -1742,6 +1783,31 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+
+ case ISD::ATOMIC_STORE: {
+ auto *AtomOp = cast<AtomicSDNode>(Node);
+ // Store FP values directly without first moving to a GPR.
+ EVT SVT = AtomOp->getMemoryVT();
+ SDValue StoredVal = AtomOp->getVal();
+ if (SVT.isInteger() && StoredVal->getOpcode() == ISD::BITCAST &&
+ StoredVal->getOperand(0).getValueType().isFloatingPoint()) {
+ StoredVal = StoredVal->getOperand(0);
+ SVT = StoredVal.getValueType();
+ }
+ StoreSDNode *St = cast<StoreSDNode>(CurDAG->getTruncStore(
+ AtomOp->getChain(), SDLoc(AtomOp), StoredVal, AtomOp->getBasePtr(), SVT,
+ AtomOp->getMemOperand()));
+ assert(St->getMemOperand()->isAtomic() && "Broken MMO.");
+ SDNode *Chain = St;
+ // We have to enforce sequential consistency by performing a
+ // serialization operation after the store.
+ if (AtomOp->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
+ Chain = CurDAG->getMachineNode(SystemZ::Serialize, SDLoc(AtomOp),
+ MVT::Other, SDValue(Chain, 0));
+ ReplaceNode(Node, Chain);
+ SelectCode(St);
+ return;
+ }
}
SelectCode(Node);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2450c6801a6632..feec1d10efd5c0 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -194,11 +194,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UADDO_CARRY, VT, Custom);
setOperationAction(ISD::USUBO_CARRY, VT, Custom);
- // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
- // stores, putting a serialization instruction after the stores.
- setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
- setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
-
// Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
// available, or if the operand is constant.
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
@@ -693,7 +688,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
// Codes for which we want to perform some z-specific combinations.
- setTargetDAGCombine({ISD::ZERO_EXTEND,
+ setTargetDAGCombine({ISD::BITCAST,
+ ISD::ZERO_EXTEND,
ISD::SIGN_EXTEND,
ISD::SIGN_EXTEND_INREG,
ISD::LOAD,
@@ -913,6 +909,22 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
return false;
}
+TargetLowering::AtomicExpansionKind
+SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
+ // Lower fp128 the same way as i128.
+ if (LI->getType()->isFP128Ty())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
+TargetLowering::AtomicExpansionKind
+SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const {
+ // Lower fp128 the same way as i128.
+ if (SI->getValueOperand()->getType()->isFP128Ty())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
TargetLowering::AtomicExpansionKind
SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// Don't expand subword operations as they require special treatment.
@@ -4495,40 +4507,14 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
-// Op is an atomic load. Lower it into a normal volatile load.
-SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue SystemZTargetLowering::lowerATOMIC_I128_LDST(SDValue Op,
+ SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
- if (Node->getMemoryVT() == MVT::i128) {
- // Use same code to handle both legal and non-legal i128 types.
- SmallVector<SDValue, 2> Results;
- LowerOperationWrapper(Node, Results, DAG);
- return DAG.getMergeValues(Results, SDLoc(Op));
- }
- return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
- Node->getChain(), Node->getBasePtr(),
- Node->getMemoryVT(), Node->getMemOperand());
-}
-
-// Op is an atomic store. Lower it into a normal volatile store.
-SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
- SelectionDAG &DAG) const {
- auto *Node = cast<AtomicSDNode>(Op.getNode());
- if (Node->getMemoryVT() == MVT::i128) {
- // Use same code to handle both legal and non-legal i128 types.
- SmallVector<SDValue, 1> Results;
- LowerOperationWrapper(Node, Results, DAG);
- return DAG.getMergeValues(Results, SDLoc(Op));
- }
- SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
- Node->getBasePtr(), Node->getMemoryVT(),
- Node->getMemOperand());
- // We have to enforce sequential consistency by performing a
- // serialization operation after the store.
- if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
- Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
- MVT::Other, Chain), 0);
- return Chain;
+ assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
+ // Use same code to handle both legal and non-legal i128 types.
+ SmallVector<SDValue, 2> Results;
+ LowerOperationWrapper(Node, Results, DAG);
+ return DAG.getMergeValues(Results, SDLoc(Op));
}
// Prepare for a Compare And Swap for a subword operation. This needs to be
@@ -5651,9 +5637,13 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
return GS.getNode(DAG, SDLoc(BVN));
}
-bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
+bool SystemZTargetLowering::isVectorElementLoad(SDValue Op, EVT VecVT) const {
if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
return true;
+ if (auto *AL = dyn_cast<AtomicSDNode>(Op))
+ if (AL->getOpcode() == ISD::ATOMIC_LOAD && SDValue(AL, 0).hasOneUse() &&
+ AL->getMemoryVT() == VecVT.getScalarType())
+ return true;
if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
return true;
return false;
@@ -5691,13 +5681,13 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
- if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
+ if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single, VT)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
- if (!isVectorElementLoad(Elem)) {
+ if (!isVectorElementLoad(Elem, VT)) {
AllLoads = false;
break;
}
@@ -5769,7 +5759,7 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
- if (isVectorElementLoad(Elems[I])) {
+ if (isVectorElementLoad(Elems[I], VT)) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@@ -6131,9 +6121,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
- return lowerATOMIC_STORE(Op, DAG);
case ISD::ATOMIC_LOAD:
- return lowerATOMIC_LOAD(Op, DAG);
+ return lowerATOMIC_I128_LDST(Op, DAG);
case ISD::ATOMIC_LOAD_ADD:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
@@ -6580,6 +6569,52 @@ SDValue SystemZTargetLowering::combineTruncateExtract(
return SDValue();
}
+// Replace ALoad with a new ATOMIC_LOAD with a result that is extended to VT
+// per ETy.
+static SDValue extendAtomicLoad(AtomicSDNode *ALoad, EVT VT, SelectionDAG &DAG,
+ ISD::LoadExtType ETy) {
+ if (VT.getSizeInBits() > 64)
+ return SDValue();
+ EVT OrigVT = ALoad->getValueType(0);
+ assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
+ EVT MemoryVT = ALoad->getMemoryVT();
+ auto *NewALoad = dyn_cast<AtomicSDNode>(DAG.getAtomic(
+ ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
+ ALoad->getBasePtr(), ALoad->getMemOperand()));
+ NewALoad->setExtensionType(ETy);
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(ALoad, 0),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
+ // Update the chain uses.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
+ return SDValue(NewALoad, 0);
+}
+
+SDValue SystemZTargetLowering::combineBITCAST(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ EVT InVT = N0.getValueType();
+ EVT ResVT = N->getValueType(0);
+ // Handle atomic loads to load float/double values directly and not via a
+ // GPR. Do it before legalization to help in treating the ATOMIC_LOAD the
+ // same way as a LOAD, and e.g. emit a REPLICATE.
+ if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
+ if (ALoad->getOpcode() == ISD::ATOMIC_LOAD && InVT.getSizeInBits() <= 64 &&
+ ALoad->getExtensionType() == ISD::NON_EXTLOAD &&
+ SDValue(ALoad, 0).hasOneUse() && InVT.isInteger() &&
+ ResVT.isFloatingPoint()) {
+ SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD, SDLoc(N), ResVT, ResVT,
+ ALoad->getChain(), ALoad->getBasePtr(),
+ ALoad->getMemOperand());
+ // Update the chain uses.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), Res.getValue(1));
+ return Res;
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineZERO_EXTEND(
SDNode *N, DAGCombinerInfo &DCI) const {
// Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
@@ -6604,6 +6639,13 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND(
return NewSelect;
}
}
+
+ // Fold into ATOMIC_LOAD unless it is already sign extending.
+ if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
+ if (ALoad->getOpcode() == ISD::ATOMIC_LOAD &&
+ ALoad->getExtensionType() != ISD::SEXTLOAD)
+ return extendAtomicLoad(ALoad, VT, DAG, ISD::ZEXTLOAD);
+
return SDValue();
}
@@ -6655,6 +6697,13 @@ SDValue SystemZTargetLowering::combineSIGN_EXTEND(
}
}
}
+
+ // Fold into ATOMIC_LOAD unless it is already zero extending.
+ if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
+ if (ALoad->getOpcode() == ISD::ATOMIC_LOAD &&
+ ALoad->getExtensionType() != ISD::ZEXTLOAD)
+ return extendAtomicLoad(ALoad, VT, DAG, ISD::SEXTLOAD);
+
return SDValue();
}
@@ -7626,6 +7675,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
default: break;
+ case ISD::BITCAST: return combineBITCAST(N, DCI);
case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index baf4ba41654879..9c442268dbb111 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -474,6 +474,8 @@ class SystemZTargetLowering : public TargetLowering {
return VT != MVT::f64;
}
bool hasInlineStackProbe(const MachineFunction &MF) const override;
+ AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override;
+ AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override;
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
@@ -692,8 +694,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_I128_LDST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
@@ -703,7 +704,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- bool isVectorElementLoad(SDValue Op) const;
+ bool isVectorElementLoad(SDValue Op, EVT VecVT) const;
SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -723,6 +724,7 @@ class SystemZTargetLowering : public TargetLowering {
bool Force) const;
SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
DAGCombinerInfo &DCI) const;
+ SDValue combineBITCAST(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index ea62e99a58399c..41d0042fdb489d 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -504,8 +504,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
- defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+ defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, nonatomic_ld, 4>;
+ defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, nonatomic_ld, 8>;
}
// Fused multiply-subtract.
@@ -513,8 +513,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
- defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+ defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, nonatomic_ld, 4>;
+ defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, nonatomic_ld, 8>;
}
// Division.
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index d98bb886c18506..28815083daab0c 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -607,6 +607,10 @@ def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
+def nonatomic_ld : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return !cast<LoadSDNode>(N)->isAtomic();
+}]>;
+
// Non-volatile stores.
class NonvolatileStore<SDPatternOperator store>
: PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
index c9c5504520345c..d75f15a574f7ef 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
@@ -4,9 +4,7 @@
define float @f1(ptr %src) {
; CHECK-LABEL: f1:
-; CHECK: lgf [[R:%r[0-9]+]], 0(%r2)
-; CHECK: sllg [[R]], [[R]], 32
-; CHECK: ldgr %f0, [[R]]
+; CHECK: le %f0
; CHECK: br %r14
%val = load atomic float, ptr %src seq_cst, align 4
ret float %val
diff --git a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
new file mode 100644
index 00000000000000..56c1eb2b85a8d5
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
@@ -0,0 +1,723 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s
+
+; Sign-extending atomic loads.
+define void @f1(ptr %src, ptr %dst) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i16
+ store volatile i16 %s, ptr %dst
+ ret void
+}
+
+define void @f2(ptr %src, ptr %dst) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i32
+ store volatile i32 %s, ptr %dst
+ ret void
+}
+
+define void @f3(ptr %src, ptr %dst) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgb %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i64
+ store volatile i64 %s, ptr %dst
+ ret void
+}
+
+define void @f4(ptr %src, ptr %dst) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lh %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %s = sext i16 %b to i32
+ store volatile i32 %s, ptr %dst
+ ret void
+}
+
+define void @f5(ptr %src, ptr %dst) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgh %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %s = sext i16 %b to i64
+ store volatile i64 %s, ptr %dst
+ ret void
+}
+
+define void @f6(ptr %src, ptr %dst) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgf %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %s = sext i32 %b to i64
+ store volatile i64 %s, ptr %dst
+ ret void
+}
+
+; Zero-extending atomic loads.
+define void @f7(ptr %src, ptr %dst) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llc %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i16
+ store volatile i16 %z, ptr %dst
+ ret void
+}
+
+define void @f8(ptr %src, ptr %dst) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llc %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i32
+ store volatile i32 %z, ptr %dst
+ ret void
+}
+
+define void @f9(ptr %src, ptr %dst) {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i64
+ store volatile i64 %z, ptr %dst
+ ret void
+}
+
+define void @f10(ptr %src, ptr %dst) {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llh %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %z = zext i16 %b to i32
+ store volatile i32 %z, ptr %dst
+ ret void
+}
+
+define void @f11(ptr %src, ptr %dst) {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgh %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %z = zext i16 %b to i64
+ store volatile i64 %z, ptr %dst
+ ret void
+}
+
+define void @f12(ptr %src, ptr %dst) {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgf %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %z = zext i32 %b to i64
+ store volatile i64 %z, ptr %dst
+ ret void
+}
+
+; reg/mem
+define i64 @f13(i64 %a, ptr %src) {
+; CHECK-LABEL: f13:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ag %r2, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i64, ptr %src seq_cst, align 8
+ %add = add i64 %a, %b
+ ret i64 %add
+}
+
+; reg/mem op with extension from memory.
+define i64 @f14(i64 %a, ptr %src) {
+; CHECK-LABEL: f14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slgf %r2, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %bext = zext i32 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check that maeb (reg/mem) is *not* used for an atomic load.
+define float @f15(float %f1, ptr %ptr, float %acc) {
+; CHECK-LABEL: f15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lde %f1, 0(%r2)
+; CHECK-NEXT: wfmasb %f0, %f0, %f1, %f2
+; CHECK-NEXT: br %r14
+ %f2 = load atomic float, ptr %ptr seq_cst, align 4
+ %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+ ret float %res
+}
+declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
+
+; Do it twice for good measure given the involved DAG combines.
+define void @f16(ptr %src, ptr %dst) {
+; CHECK-LABEL: f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: lgbr %r1, %r0
+; CHECK-NEXT: stg %r1, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: lgbr %r1, %r0
+; CHECK-NEXT: stg %r1, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i64
+ %z = zext i8 %b to i64
+ store volatile i64 %s, ptr %dst
+ store volatile i64 %z, ptr %dst
+
+ %b2 = load atomic i8, ptr %src seq_cst, align 1
+ %s2 = sext i8 %b2 to i64
+ %z2 = zext i8 %b2 to i64
+ store volatile i64 %s2, ptr %dst
+ store volatile i64 %z2, ptr %dst
+
+ ret void
+}
+
+define void @f16_b(ptr %src, ptr %dst) {
+; CHECK-LABEL: f16_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgb %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i16
+ store volatile i16 %s, ptr %dst
+
+ %s2 = sext i8 %b to i64
+ store volatile i64 %s2, ptr %dst
+
+ ret void
+}
+
+define void @f16_c(ptr %src, ptr %dst) {
+; CHECK-LABEL: f16_c:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i16
+ store volatile i16 %z, ptr %dst
+
+ %z2 = zext i8 %b to i64
+ store volatile i64 %z2, ptr %dst
+
+ ret void
+}
+
+; Check that two i8 loads use a reg/reg op.
+define i8 @f16_d(ptr %src, ptr %src2) {
+; CHECK-LABEL: f16_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r2, 0(%r2)
+; CHECK-NEXT: lb %r0, 0(%r3)
+; CHECK-NEXT: ar %r2, %r0
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %b2 = load atomic i8, ptr %src2 seq_cst, align 1
+ %add = add i8 %b, %b2
+ ret i8 %add
+}
+
+; Binary operations on a byte in memory, with an atomic load.
+define void @f17(ptr %ptr) {
+; CHECK-LABEL: f17:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ni 0(%r2), 1
+; CHECK-NEXT: br %r14
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %xor = and i8 %val, -255
+ store i8 %xor, ptr %ptr
+ ret void
+}
+
+define void @f18(ptr %src) {
+; CHECK-LABEL: f18:
+; CHECK: # %bb.0:
+; CHECK-NEXT: oiy 4096(%r2), 1
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i8, ptr %src, i64 4096
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %xor = or i8 %val, -255
+ store i8 %xor, ptr %ptr
+ ret void
+}
+
+define void @f19(ptr %src) {
+; CHECK-LABEL: f19:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xi 4095(%r2), 1
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i8, ptr %src, i64 4095
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %xor = xor i8 %val, -255
+ store i8 %xor, ptr %ptr
+ ret void
+}
+
+; TM
+define double @f20(ptr %src, double %a, double %b) {
+; CHECK-LABEL: f20:
+; CHECK: # %bb.0:
+; CHECK-NEXT: tm 0(%r2), 1
+; CHECK-NEXT: je .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ldr %f2, %f0
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: br %r14
+ %byte = load atomic i8, ptr %src seq_cst, align 1
+ %and = and i8 %byte, 1
+ %cmp = icmp eq i8 %and, 0
+ %res = select i1 %cmp, double %b, double %a
+ ret double %res
+}
+
+; vector load and replicate
+define void @f21(ptr %src, ptr %dst) {
+; CHECK-LABEL: f21:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepb %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %v = insertelement <16 x i8> undef, i8 %b, i32 1
+ store volatile <16 x i8> %v, ptr %dst
+ ret void
+}
+
+define void @f22(ptr %src, ptr %dst) {
+; CHECK-LABEL: f22:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlreph %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %v = insertelement <8 x i16> undef, i16 %b, i32 1
+ store volatile <8 x i16> %v, ptr %dst
+ ret void
+}
+
+define void @f23(ptr %src, ptr %dst) {
+; CHECK-LABEL: f23:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepf %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %v = insertelement <4 x i32> undef, i32 %b, i32 2
+ store volatile <4 x i32> %v, ptr %dst
+ ret void
+}
+
+define void @f24(ptr %src, ptr %dst) {
+; CHECK-LABEL: f24:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepg %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i64, ptr %src seq_cst, align 8
+ %v = insertelement <2 x i64> undef, i64 %b, i32 0
+ store volatile <2 x i64> %v, ptr %dst
+ ret void
+}
+
+define void @f25(ptr %src, ptr %dst) {
+; CHECK-LABEL: f25:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepf %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic float, ptr %src seq_cst, align 4
+ %v = insertelement <4 x float> undef, float %b, i32 1
+ store volatile <4 x float> %v, ptr %dst
+ ret void
+}
+
+define void @f25_b(ptr %src, ptr %dst) {
+; CHECK-LABEL: f25_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepf %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %l = load atomic i32, ptr %src seq_cst, align 4
+ %b = bitcast i32 %l to float
+ %v = insertelement <4 x float> undef, float %b, i32 1
+ store volatile <4 x float> %v, ptr %dst
+ ret void
+}
+
+define void @f26(ptr %src, ptr %dst) {
+; CHECK-LABEL: f26:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepg %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic double, ptr %src seq_cst, align 8
+ %v = insertelement <2 x double> undef, double %b, i32 0
+ store volatile <2 x double> %v, ptr %dst
+ ret void
+}
+
+define void @f26_b(ptr %src, ptr %dst) {
+; CHECK-LABEL: f26_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepg %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %l = load atomic i64, ptr %src seq_cst, align 8
+ %b = bitcast i64 %l to double
+ %v = insertelement <2 x double> undef, double %b, i32 0
+ store volatile <2 x double> %v, ptr %dst
+ ret void
+}
+
+; Vector Load logical element and zero.
+define <16 x i8> @f27(ptr %ptr) {
+; CHECK-LABEL: f27:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezb %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @f28(ptr %ptr) {
+; CHECK-LABEL: f28:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezh %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i16, ptr %ptr seq_cst, align 2
+ %ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @f29(ptr %ptr) {
+; CHECK-LABEL: f29:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i32, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @f30(ptr %ptr) {
+; CHECK-LABEL: f30:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezg %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i64, ptr %ptr seq_cst, align 8
+ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
+ ret <2 x i64> %ret
+}
+
+define <4 x i32> @f31(ptr %ptr) {
+; CHECK-LABEL: f31:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezlf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i32, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0
+ ret <4 x i32> %ret
+}
+
+define <4 x float> @f32(ptr %ptr) {
+; CHECK-LABEL: f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezlf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic float, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 0
+ ret <4 x float> %ret
+}
+
+; Vector Load element.
+define <16 x i8> @f33(<16 x i8> %val, ptr %ptr) {
+; CHECK-LABEL: f33:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vleb %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i8, ptr %ptr seq_cst, align 1
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 0
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @f34(<8 x i16> %val, ptr %ptr) {
+; CHECK-LABEL: f34:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vleh %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i16, ptr %ptr seq_cst, align 2
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 0
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @f35(<4 x i32> %val, ptr %ptr) {
+; CHECK-LABEL: f35:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlef %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i32, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @f36(<2 x i64> %val, ptr %ptr) {
+; CHECK-LABEL: f36:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vleg %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i64, ptr %ptr seq_cst, align 8
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test that fp values are loaded/stored directly. Clang FE currently always
+; emits atomic load/stores casted this way.
+define void @f37(ptr %src, ptr %dst) {
+; CHECK-LABEL: f37:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld %f0, 0(%r2)
+; CHECK-NEXT: adbr %f0, %f0
+; CHECK-NEXT: std %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i64, ptr %src seq_cst, align 8
+ %bc0 = bitcast i64 %atomic-load to double
+ %fa = fadd double %bc0, %bc0
+ %bc1 = bitcast double %fa to i64
+ store atomic i64 %bc1, ptr %dst seq_cst, align 8
+ ret void
+}
+
+define void @f38(ptr %src, ptr %dst) {
+; CHECK-LABEL: f38:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lde %f0, 0(%r2)
+; CHECK-NEXT: aebr %f0, %f0
+; CHECK-NEXT: ste %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i32, ptr %src seq_cst, align 8
+ %bc0 = bitcast i32 %atomic-load to float
+ %fa = fadd float %bc0, %bc0
+ %bc1 = bitcast float %fa to i32
+ store atomic i32 %bc1, ptr %dst seq_cst, align 8
+ ret void
+}
+
+; Test operation on memory involving atomic load and store.
+define void @f39(ptr %ptr) {
+; CHECK-LABEL: f39:
+; CHECK: # %bb.0:
+; CHECK-NEXT: oi 0(%r2), 1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %or = or i8 %val, -255
+ store atomic i8 %or, ptr %ptr seq_cst, align 1
+ ret void
+}
+
+; Some atomic stores of immediates.
+define void @f40(ptr %ptr) {
+; CHECK-LABEL: f40:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mvi 0(%r2), 128
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i8 128, ptr %ptr seq_cst, align 1
+ ret void
+}
+
+define void @f41(ptr %ptr) {
+; CHECK-LABEL: f41:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mvhi 0(%r2), -1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i32 4294967295, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f42(ptr %ptr) {
+; CHECK-LABEL: f42:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mvhi 0(%r2), -1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i32 4294967295, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f43(ptr %ptr) {
+; CHECK-LABEL: f43:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llihl %r0, 255
+; CHECK-NEXT: oilf %r0, 4294967295
+; CHECK-NEXT: stg %r0, 0(%r2)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i64 1099511627775, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+define void @f44(ptr %ptr) {
+; CHECK-LABEL: f44:
+; CHECK: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI48_0
+; CHECK-NEXT: ld %f0, 0(%r1)
+; CHECK-NEXT: std %f0, 0(%r2)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic double 0x3ff0000020000000, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+; Vector Store Element.
+define void @f45(<16 x i8> %val, ptr %ptr) {
+; CHECK-LABEL: f45:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteb %v24, 0(%r2), 0
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <16 x i8> %val, i32 0
+ store atomic i8 %element, ptr %ptr seq_cst, align 1
+ ret void
+}
+
+define void @f46(<8 x i16> %val, ptr %base) {
+; CHECK-LABEL: f46:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteh %v24, 4094(%r2), 5
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i16, ptr %base, i32 2047
+ %element = extractelement <8 x i16> %val, i32 5
+ store atomic i16 %element, ptr %ptr seq_cst, align 2
+ ret void
+}
+
+define void @f47(<4 x i32> %val, ptr %ptr) {
+; CHECK-LABEL: f47:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vstef %v24, 0(%r2), 3
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <4 x i32> %val, i32 3
+ store atomic i32 %element, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f48(<2 x i64> %val, ptr %ptr) {
+; CHECK-LABEL: f48:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteg %v24, 0(%r2), 1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <2 x i64> %val, i32 1
+ store atomic i64 %element, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+define void @f49(<4 x float> %val, ptr %ptr) {
+; CHECK-LABEL: f49:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vstef %v24, 0(%r2), 0
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <4 x float> %val, i32 0
+ store atomic float %element, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f50(<2 x double> %val, ptr %ptr) {
+; CHECK-LABEL: f50:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteg %v24, 0(%r2), 1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <2 x double> %val, i32 1
+ store atomic double %element, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+define void @f51(ptr %src, ptr %dst) {
+; CHECK-LABEL: f51:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lpq %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r1
+; CHECK-NEXT: vgmf %v1, 2, 8
+; CHECK-NEXT: aebr %f0, %f1
+; CHECK-NEXT: ste %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i128, ptr %src seq_cst, align 16
+ %b0 = bitcast i128 %atomic-load to <4 x float>
+ %vecext = extractelement <4 x float> %b0, i64 0
+ %add = fadd float %vecext, 1.000000e+00
+ %b1 = bitcast float %add to i32
+ store atomic i32 %b1, ptr %dst seq_cst, align 4
+ ret void
+}
+
+define void @f52(ptr %src, ptr %dst) {
+; CHECK-LABEL: f52:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lpq %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r1
+; CHECK-NEXT: vgmg %v1, 2, 11
+; CHECK-NEXT: adbr %f0, %f1
+; CHECK-NEXT: std %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i128, ptr %src seq_cst, align 16
+ %b0 = bitcast i128 %atomic-load to <2 x double>
+ %vecext = extractelement <2 x double> %b0, i64 0
+ %add = fadd double %vecext, 1.000000e+00
+ %b1 = bitcast double %add to i64
+ store atomic i64 %b1, ptr %dst seq_cst, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-06.ll b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll
index b748bfc767a4db..91e324b0af1a97 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-store-06.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll
@@ -6,10 +6,7 @@
define void @f1(ptr %src, float %val) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0s killed $f0s def $f0d
-; CHECK-NEXT: lgdr %r0, %f0
-; CHECK-NEXT: srlg %r0, %r0, 32
-; CHECK-NEXT: st %r0, 0(%r2)
+; CHECK-NEXT: ste %f0, 0(%r2)
; CHECK-NEXT: bcr 15, %r0
; CHECK-NEXT: br %r14
store atomic float %val, ptr %src seq_cst, align 4
>From 27f7015898efd6ef5e0c0cecfb95e1bc639c1970 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Tue, 9 Jan 2024 13:40:12 -0600
Subject: [PATCH 2/2] Updates after review
---
.../SelectionDAG/LegalizeIntegerTypes.cpp | 13 ++++--
.../Target/SystemZ/SystemZISelDAGToDAG.cpp | 7 +---
.../Target/SystemZ/SystemZISelLowering.cpp | 15 ++++---
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 4 +-
llvm/lib/Target/SystemZ/SystemZInstrFP.td | 8 ++--
llvm/lib/Target/SystemZ/SystemZOperators.td | 4 --
llvm/test/CodeGen/SystemZ/atomic-memofolds.ll | 42 +++++++++++++++++--
7 files changed, 63 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f444f3010b0d8b..40e005e279776c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -343,12 +343,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
if (N->getOpcode() == ISD::ATOMIC_LOAD) {
ISD::LoadExtType ETy = cast<AtomicSDNode>(N)->getExtensionType();
if (ETy == ISD::NON_EXTLOAD) {
- if (TLI.getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ switch (TLI.getExtendForAtomicOps()) {
+ case ISD::SIGN_EXTEND:
ETy = ISD::SEXTLOAD;
- else if (TLI.getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ break;
+ case ISD::ZERO_EXTEND:
ETy = ISD::ZEXTLOAD;
- else
+ break;
+ case ISD::ANY_EXTEND:
ETy = ISD::EXTLOAD;
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
}
cast<AtomicSDNode>(Res)->setExtensionType(ETy);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index f4e6081ae82104..48b2999096fcd4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1516,13 +1516,10 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
MachineMemOperand *MMO = MemAccess->getMemOperand();
assert(MMO && "Expected a memory operand.");
- // These instructions are not atomic.
- if (MMO->isAtomic())
- return false;
-
// The memory access must have a proper alignment and no index register.
+ // ATOMIC_LOADs do not have the offset operand.
if (MemAccess->getAlign().value() < StoreSize ||
- !MemAccess->getOffset().isUndef())
+ (!MMO->isAtomic() && !MemAccess->getOffset().isUndef()))
return false;
// The MMO must not have an unaligned offset.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index feec1d10efd5c0..9ab54dfefdce01 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4507,7 +4507,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
-SDValue SystemZTargetLowering::lowerATOMIC_I128_LDST(SDValue Op,
+SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
@@ -5637,12 +5637,11 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
return GS.getNode(DAG, SDLoc(BVN));
}
-bool SystemZTargetLowering::isVectorElementLoad(SDValue Op, EVT VecVT) const {
+bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
return true;
if (auto *AL = dyn_cast<AtomicSDNode>(Op))
- if (AL->getOpcode() == ISD::ATOMIC_LOAD && SDValue(AL, 0).hasOneUse() &&
- AL->getMemoryVT() == VecVT.getScalarType())
+ if (AL->getOpcode() == ISD::ATOMIC_LOAD)
return true;
if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
return true;
@@ -5681,13 +5680,13 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
- if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single, VT)))
+ if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
- if (!isVectorElementLoad(Elem, VT)) {
+ if (!isVectorElementLoad(Elem)) {
AllLoads = false;
break;
}
@@ -5759,7 +5758,7 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
- if (isVectorElementLoad(Elems[I], VT)) {
+ if (isVectorElementLoad(Elems[I])) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@@ -6122,7 +6121,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
case ISD::ATOMIC_LOAD:
- return lowerATOMIC_I128_LDST(Op, DAG);
+ return lowerATOMIC_LDST_I128(Op, DAG);
case ISD::ATOMIC_LOAD_ADD:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 9c442268dbb111..e1ea069f30ba86 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -694,7 +694,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerATOMIC_I128_LDST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
@@ -704,7 +704,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- bool isVectorElementLoad(SDValue Op, EVT VecVT) const;
+ bool isVectorElementLoad(SDValue Op) const;
SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 41d0042fdb489d..ea62e99a58399c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -504,8 +504,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, nonatomic_ld, 4>;
- defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, nonatomic_ld, 8>;
+ defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+ defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
}
// Fused multiply-subtract.
@@ -513,8 +513,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, nonatomic_ld, 4>;
- defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, nonatomic_ld, 8>;
+ defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+ defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
}
// Division.
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 28815083daab0c..d98bb886c18506 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -607,10 +607,6 @@ def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
-def nonatomic_ld : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return !cast<LoadSDNode>(N)->isAtomic();
-}]>;
-
// Non-volatile stores.
class NonvolatileStore<SDPatternOperator store>
: PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
diff --git a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
index 56c1eb2b85a8d5..fa1578df04bec1 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
@@ -170,12 +170,11 @@ define i64 @f14(i64 %a, ptr %src) {
ret i64 %sub
}
-; Check that maeb (reg/mem) is *not* used for an atomic load.
define float @f15(float %f1, ptr %ptr, float %acc) {
; CHECK-LABEL: f15:
; CHECK: # %bb.0:
-; CHECK-NEXT: lde %f1, 0(%r2)
-; CHECK-NEXT: wfmasb %f0, %f0, %f1, %f2
+; CHECK-NEXT: maeb %f2, %f0, 0(%r2)
+; CHECK-NEXT: ldr %f0, %f2
; CHECK-NEXT: br %r14
%f2 = load atomic float, ptr %ptr seq_cst, align 4
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
@@ -387,6 +386,39 @@ define void @f25_b(ptr %src, ptr %dst) {
ret void
}
+; Do *not* use vlrep for an extending load.
+define <4 x i32> @f25_c(ptr %ptr) {
+; CHECK-LABEL: f25_c:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r0
+; CHECK-NEXT: vrepf %v24, %v0, 1
+; CHECK-NEXT: br %r14
+ %L = load atomic i8, ptr %ptr seq_cst, align 4
+ %S = sext i8 %L to i32
+ %val = insertelement <4 x i32> undef, i32 %S, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Do *not* use vlrep if there is another scalar use.
+define <4 x i32> @f25_d(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: f25_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: l %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r0
+; CHECK-NEXT: vrepf %v24, %v0, 1
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %L = load atomic i32, ptr %ptr seq_cst, align 4
+ store i32 %L, ptr %dst, align 4
+ %val = insertelement <4 x i32> undef, i32 %L, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
define void @f26(ptr %src, ptr %dst) {
; CHECK-LABEL: f26:
; CHECK: # %bb.0:
@@ -412,6 +444,8 @@ define void @f26_b(ptr %src, ptr %dst) {
ret void
}
+
+
; Vector Load logical element and zero.
define <16 x i8> @f27(ptr %ptr) {
; CHECK-LABEL: f27:
@@ -607,7 +641,7 @@ define void @f43(ptr %ptr) {
define void @f44(ptr %ptr) {
; CHECK-LABEL: f44:
; CHECK: # %bb.0:
-; CHECK-NEXT: larl %r1, .LCPI48_0
+; CHECK-NEXT: larl %r1, .LCPI50_0
; CHECK-NEXT: ld %f0, 0(%r1)
; CHECK-NEXT: std %f0, 0(%r2)
; CHECK-NEXT: bcr 14, %r0
More information about the llvm-commits
mailing list