[llvm] [SystemZ] Don't lower float/double ATOMIC_[LOAD|STORE] to [LOAD|STORE] (PR #75879)
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 03:03:27 PST 2024
https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/75879
>From ddba819ca007082be59a9ffc1a96e98039c5b6fd Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Mon, 18 Dec 2023 18:49:11 -0600
Subject: [PATCH 1/3] [SystemZ] Don't lower ATOMIC_[LOAD|STORE] to [LOAD|STORE]
(Use PatFrags for loads.) Try to convert to LOAD in select() instead. Was
e20dad7
---
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 11 +
.../SelectionDAG/LegalizeIntegerTypes.cpp | 13 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 +
.../SelectionDAG/SelectionDAGDumper.cpp | 12 +
.../Target/SystemZ/SystemZISelDAGToDAG.cpp | 66 ++
.../Target/SystemZ/SystemZISelLowering.cpp | 140 ++--
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 8 +-
llvm/lib/Target/SystemZ/SystemZInstrFP.td | 8 +-
llvm/lib/Target/SystemZ/SystemZOperators.td | 4 +
llvm/test/CodeGen/SystemZ/atomic-load-06.ll | 4 +-
llvm/test/CodeGen/SystemZ/atomic-memofolds.ll | 723 ++++++++++++++++++
llvm/test/CodeGen/SystemZ/atomic-store-06.ll | 5 +-
12 files changed, 945 insertions(+), 59 deletions(-)
create mode 100644 llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 3130f6c4dce598..d7468f936d2270 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -558,6 +558,7 @@ BEGIN_TWO_BYTE_PACK()
class LoadSDNodeBitfields {
friend class LoadSDNode;
+ friend class AtomicSDNode;
friend class VPLoadSDNode;
friend class VPStridedLoadSDNode;
friend class MaskedLoadSDNode;
@@ -1473,6 +1474,16 @@ class AtomicSDNode : public MemSDNode {
MMO->isAtomic()) && "then why are we using an AtomicSDNode?");
}
+ void setExtensionType(ISD::LoadExtType ETy) {
+ assert(getOpcode() == ISD::ATOMIC_LOAD && "Only used for atomic loads.");
+ LoadSDNodeBits.ExtTy = ETy;
+ }
+
+ ISD::LoadExtType getExtensionType() const {
+ assert(getOpcode() == ISD::ATOMIC_LOAD && "Only used for atomic loads.");
+ return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
+ }
+
const SDValue &getBasePtr() const {
return getOpcode() == ISD::ATOMIC_STORE ? getOperand(2) : getOperand(1);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 39b7e061554141..7642b6f4160a7b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -340,6 +340,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
N->getMemoryVT(), ResVT,
N->getChain(), N->getBasePtr(),
N->getMemOperand());
+ if (N->getOpcode() == ISD::ATOMIC_LOAD) {
+ ISD::LoadExtType ETy = cast<AtomicSDNode>(N)->getExtensionType();
+ if (ETy == ISD::NON_EXTLOAD) {
+ if (TLI.getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ ETy = ISD::SEXTLOAD;
+ else if (TLI.getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ ETy = ISD::ZEXTLOAD;
+ else
+ ETy = ISD::EXTLOAD;
+ }
+ cast<AtomicSDNode>(Res)->setExtensionType(ETy);
+ }
+
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 55eee780d512c8..c38789d26e8e1f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4037,6 +4037,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Op.getResNo() == 0) {
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
Known.Zero.setBitsFrom(MemBits);
+ else if (Op->getOpcode() == ISD::ATOMIC_LOAD &&
+ cast<AtomicSDNode>(Op)->getExtensionType() == ISD::ZEXTLOAD)
+ Known.Zero.setBitsFrom(MemBits);
}
break;
}
@@ -4848,6 +4851,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits - Tmp + 1;
if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND)
return VTBits - Tmp;
+ if (Op->getOpcode() == ISD::ATOMIC_LOAD) {
+ ISD::LoadExtType ETy = cast<AtomicSDNode>(Op)->getExtensionType();
+ if (ETy == ISD::SEXTLOAD)
+ return VTBits - Tmp + 1;
+ if (ETy == ISD::ZEXTLOAD)
+ return VTBits - Tmp;
+ }
}
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index a28d834f0522f2..20d627b679b571 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -828,6 +828,18 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
+ if (auto *A = dyn_cast<AtomicSDNode>(M))
+ if (A->getOpcode() == ISD::ATOMIC_LOAD) {
+ bool doExt = true;
+ switch (A->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << A->getMemoryVT();
+ }
OS << ">";
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(this)) {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 815eca1240d827..f4e6081ae82104 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -347,6 +347,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// Try to expand a boolean SELECT_CCMASK using an IPM sequence.
SDValue expandSelectBoolean(SDNode *Node);
+ // Convert ATOMIC_LOADs to LOADs to facilitate instruction selection.
+ void convertATOMIC_LOADs(SDNode *Node, unsigned Depth = 0);
+
public:
static char ID;
@@ -1513,6 +1516,10 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
MachineMemOperand *MMO = MemAccess->getMemOperand();
assert(MMO && "Expected a memory operand.");
+ // These instructions are not atomic.
+ if (MMO->isAtomic())
+ return false;
+
// The memory access must have a proper alignment and no index register.
if (MemAccess->getAlign().value() < StoreSize ||
!MemAccess->getOffset().isUndef())
@@ -1545,6 +1552,37 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
return true;
}
+// This is a hack to convert ATOMIC_LOADs to LOADs in the last minute just
+// before instruction selection begins. It would have been easier if
+// ATOMIC_LOAD nodes would instead always be built by SelectionDAGBuilder as
+// LOADs with an atomic MMO and properly handled as such in DAGCombiner, but
+// until that changes they need to remain as ATOMIC_LOADs until all
+// DAGCombining is done. Convert Node or any of its operands from
+// ATOMIC_LOAD to LOAD.
+void SystemZDAGToDAGISel::convertATOMIC_LOADs(SDNode *Node, unsigned Depth) {
+ if (Depth > 1) // Chain operands are also followed so this seems enough.
+ return;
+ if (Node->getOpcode() == ISD::ATOMIC_LOAD) {
+ auto *ALoad = cast<AtomicSDNode>(Node);
+ // It seems necessary to morph the node as it is not yet being selected.
+ LoadSDNode *Ld = cast<LoadSDNode>(CurDAG->MorphNodeTo(
+ ALoad, ISD::LOAD, CurDAG->getVTList(ALoad->getValueType(0), MVT::Other),
+ {ALoad->getChain(), ALoad->getBasePtr()}));
+ // Sanity check the morph. The extension type for an extending load
+ // should have been set prior to instruction selection and remain in the
+ // morphed node.
+ assert(((SDNode *)Ld) == ((SDNode *)ALoad) && "Bad CSE on atomic load.");
+ assert(Ld->getMemOperand()->isAtomic() && "Broken MMO.");
+ ISD::LoadExtType ETy = Ld->getExtensionType();
+ bool IsNonExt = Ld->getMemoryVT().getSizeInBits() ==
+ Ld->getValueType(0).getSizeInBits();
+ assert(IsNonExt == (ETy == ISD::NON_EXTLOAD) && "Bad extension type.");
+ return;
+ }
+ for (SDValue Op : Node->ops())
+ convertATOMIC_LOADs(Op.getNode(), ++Depth);
+}
+
void SystemZDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -1553,6 +1591,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
return;
}
+ // Prepare any ATOMIC_LOAD to be selected as a LOAD with an atomic MMO.
+ convertATOMIC_LOADs(Node);
+
unsigned Opcode = Node->getOpcode();
switch (Opcode) {
case ISD::OR:
@@ -1742,6 +1783,31 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+
+ case ISD::ATOMIC_STORE: {
+ auto *AtomOp = cast<AtomicSDNode>(Node);
+ // Store FP values directly without first moving to a GPR.
+ EVT SVT = AtomOp->getMemoryVT();
+ SDValue StoredVal = AtomOp->getVal();
+ if (SVT.isInteger() && StoredVal->getOpcode() == ISD::BITCAST &&
+ StoredVal->getOperand(0).getValueType().isFloatingPoint()) {
+ StoredVal = StoredVal->getOperand(0);
+ SVT = StoredVal.getValueType();
+ }
+ StoreSDNode *St = cast<StoreSDNode>(CurDAG->getTruncStore(
+ AtomOp->getChain(), SDLoc(AtomOp), StoredVal, AtomOp->getBasePtr(), SVT,
+ AtomOp->getMemOperand()));
+ assert(St->getMemOperand()->isAtomic() && "Broken MMO.");
+ SDNode *Chain = St;
+ // We have to enforce sequential consistency by performing a
+ // serialization operation after the store.
+ if (AtomOp->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
+ Chain = CurDAG->getMachineNode(SystemZ::Serialize, SDLoc(AtomOp),
+ MVT::Other, SDValue(Chain, 0));
+ ReplaceNode(Node, Chain);
+ SelectCode(St);
+ return;
+ }
}
SelectCode(Node);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index d92586f7d05d0d..39bd1aea6979c4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -194,11 +194,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UADDO_CARRY, VT, Custom);
setOperationAction(ISD::USUBO_CARRY, VT, Custom);
- // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
- // stores, putting a serialization instruction after the stores.
- setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
- setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
-
// Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
// available, or if the operand is constant.
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
@@ -700,7 +695,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
// Codes for which we want to perform some z-specific combinations.
- setTargetDAGCombine({ISD::ZERO_EXTEND,
+ setTargetDAGCombine({ISD::BITCAST,
+ ISD::ZERO_EXTEND,
ISD::SIGN_EXTEND,
ISD::SIGN_EXTEND_INREG,
ISD::LOAD,
@@ -920,6 +916,22 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
return false;
}
+TargetLowering::AtomicExpansionKind
+SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
+ // Lower fp128 the same way as i128.
+ if (LI->getType()->isFP128Ty())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
+TargetLowering::AtomicExpansionKind
+SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const {
+ // Lower fp128 the same way as i128.
+ if (SI->getValueOperand()->getType()->isFP128Ty())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
TargetLowering::AtomicExpansionKind
SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// Don't expand subword operations as they require special treatment.
@@ -4503,40 +4515,14 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
-// Op is an atomic load. Lower it into a normal volatile load.
-SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue SystemZTargetLowering::lowerATOMIC_I128_LDST(SDValue Op,
+ SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
- if (Node->getMemoryVT() == MVT::i128) {
- // Use same code to handle both legal and non-legal i128 types.
- SmallVector<SDValue, 2> Results;
- LowerOperationWrapper(Node, Results, DAG);
- return DAG.getMergeValues(Results, SDLoc(Op));
- }
- return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
- Node->getChain(), Node->getBasePtr(),
- Node->getMemoryVT(), Node->getMemOperand());
-}
-
-// Op is an atomic store. Lower it into a normal volatile store.
-SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
- SelectionDAG &DAG) const {
- auto *Node = cast<AtomicSDNode>(Op.getNode());
- if (Node->getMemoryVT() == MVT::i128) {
- // Use same code to handle both legal and non-legal i128 types.
- SmallVector<SDValue, 1> Results;
- LowerOperationWrapper(Node, Results, DAG);
- return DAG.getMergeValues(Results, SDLoc(Op));
- }
- SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
- Node->getBasePtr(), Node->getMemoryVT(),
- Node->getMemOperand());
- // We have to enforce sequential consistency by performing a
- // serialization operation after the store.
- if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
- Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
- MVT::Other, Chain), 0);
- return Chain;
+ assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
+ // Use same code to handle both legal and non-legal i128 types.
+ SmallVector<SDValue, 2> Results;
+ LowerOperationWrapper(Node, Results, DAG);
+ return DAG.getMergeValues(Results, SDLoc(Op));
}
// Prepare for a Compare And Swap for a subword operation. This needs to be
@@ -5659,9 +5645,13 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
return GS.getNode(DAG, SDLoc(BVN));
}
-bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
+bool SystemZTargetLowering::isVectorElementLoad(SDValue Op, EVT VecVT) const {
if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
return true;
+ if (auto *AL = dyn_cast<AtomicSDNode>(Op))
+ if (AL->getOpcode() == ISD::ATOMIC_LOAD && SDValue(AL, 0).hasOneUse() &&
+ AL->getMemoryVT() == VecVT.getScalarType())
+ return true;
if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
return true;
return false;
@@ -5699,13 +5689,13 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
- if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
+ if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single, VT)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
- if (!isVectorElementLoad(Elem)) {
+ if (!isVectorElementLoad(Elem, VT)) {
AllLoads = false;
break;
}
@@ -5777,7 +5767,7 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
- if (isVectorElementLoad(Elems[I])) {
+ if (isVectorElementLoad(Elems[I], VT)) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@@ -6138,9 +6128,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
- return lowerATOMIC_STORE(Op, DAG);
case ISD::ATOMIC_LOAD:
- return lowerATOMIC_LOAD(Op, DAG);
+ return lowerATOMIC_I128_LDST(Op, DAG);
case ISD::ATOMIC_LOAD_ADD:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
@@ -6587,6 +6576,52 @@ SDValue SystemZTargetLowering::combineTruncateExtract(
return SDValue();
}
+// Replace ALoad with a new ATOMIC_LOAD with a result that is extended to VT
+// per ETy.
+static SDValue extendAtomicLoad(AtomicSDNode *ALoad, EVT VT, SelectionDAG &DAG,
+ ISD::LoadExtType ETy) {
+ if (VT.getSizeInBits() > 64)
+ return SDValue();
+ EVT OrigVT = ALoad->getValueType(0);
+ assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
+ EVT MemoryVT = ALoad->getMemoryVT();
+ auto *NewALoad = dyn_cast<AtomicSDNode>(DAG.getAtomic(
+ ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
+ ALoad->getBasePtr(), ALoad->getMemOperand()));
+ NewALoad->setExtensionType(ETy);
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(ALoad, 0),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
+ // Update the chain uses.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
+ return SDValue(NewALoad, 0);
+}
+
+SDValue SystemZTargetLowering::combineBITCAST(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ EVT InVT = N0.getValueType();
+ EVT ResVT = N->getValueType(0);
+ // Handle atomic loads to load float/double values directly and not via a
+ // GPR. Do it before legalization to help in treating the ATOMIC_LOAD the
+ // same way as a LOAD, and e.g. emit a REPLICATE.
+ if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
+ if (ALoad->getOpcode() == ISD::ATOMIC_LOAD && InVT.getSizeInBits() <= 64 &&
+ ALoad->getExtensionType() == ISD::NON_EXTLOAD &&
+ SDValue(ALoad, 0).hasOneUse() && InVT.isInteger() &&
+ ResVT.isFloatingPoint()) {
+ SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD, SDLoc(N), ResVT, ResVT,
+ ALoad->getChain(), ALoad->getBasePtr(),
+ ALoad->getMemOperand());
+ // Update the chain uses.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), Res.getValue(1));
+ return Res;
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineZERO_EXTEND(
SDNode *N, DAGCombinerInfo &DCI) const {
// Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
@@ -6611,6 +6646,13 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND(
return NewSelect;
}
}
+
+ // Fold into ATOMIC_LOAD unless it is already sign extending.
+ if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
+ if (ALoad->getOpcode() == ISD::ATOMIC_LOAD &&
+ ALoad->getExtensionType() != ISD::SEXTLOAD)
+ return extendAtomicLoad(ALoad, VT, DAG, ISD::ZEXTLOAD);
+
return SDValue();
}
@@ -6662,6 +6704,13 @@ SDValue SystemZTargetLowering::combineSIGN_EXTEND(
}
}
}
+
+ // Fold into ATOMIC_LOAD unless it is already zero extending.
+ if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
+ if (ALoad->getOpcode() == ISD::ATOMIC_LOAD &&
+ ALoad->getExtensionType() != ISD::ZEXTLOAD)
+ return extendAtomicLoad(ALoad, VT, DAG, ISD::SEXTLOAD);
+
return SDValue();
}
@@ -7633,6 +7682,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
default: break;
+ case ISD::BITCAST: return combineBITCAST(N, DCI);
case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index baf4ba41654879..9c442268dbb111 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -474,6 +474,8 @@ class SystemZTargetLowering : public TargetLowering {
return VT != MVT::f64;
}
bool hasInlineStackProbe(const MachineFunction &MF) const override;
+ AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override;
+ AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override;
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
@@ -692,8 +694,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_I128_LDST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
@@ -703,7 +704,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- bool isVectorElementLoad(SDValue Op) const;
+ bool isVectorElementLoad(SDValue Op, EVT VecVT) const;
SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -723,6 +724,7 @@ class SystemZTargetLowering : public TargetLowering {
bool Force) const;
SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
DAGCombinerInfo &DCI) const;
+ SDValue combineBITCAST(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 6e67425c1e788b..8cafa0a936a404 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -495,8 +495,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
- defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+ defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, nonatomic_ld, 4>;
+ defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, nonatomic_ld, 8>;
}
// Fused multiply-subtract.
@@ -504,8 +504,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
- defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+ defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, nonatomic_ld, 4>;
+ defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, nonatomic_ld, 8>;
}
// Division.
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index d98bb886c18506..28815083daab0c 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -607,6 +607,10 @@ def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
+def nonatomic_ld : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return !cast<LoadSDNode>(N)->isAtomic();
+}]>;
+
// Non-volatile stores.
class NonvolatileStore<SDPatternOperator store>
: PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
index c9c5504520345c..d75f15a574f7ef 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
@@ -4,9 +4,7 @@
define float @f1(ptr %src) {
; CHECK-LABEL: f1:
-; CHECK: lgf [[R:%r[0-9]+]], 0(%r2)
-; CHECK: sllg [[R]], [[R]], 32
-; CHECK: ldgr %f0, [[R]]
+; CHECK: le %f0
; CHECK: br %r14
%val = load atomic float, ptr %src seq_cst, align 4
ret float %val
diff --git a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
new file mode 100644
index 00000000000000..56c1eb2b85a8d5
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
@@ -0,0 +1,723 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s
+
+; Sign-extending atomic loads.
+define void @f1(ptr %src, ptr %dst) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i16
+ store volatile i16 %s, ptr %dst
+ ret void
+}
+
+define void @f2(ptr %src, ptr %dst) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i32
+ store volatile i32 %s, ptr %dst
+ ret void
+}
+
+define void @f3(ptr %src, ptr %dst) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgb %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i64
+ store volatile i64 %s, ptr %dst
+ ret void
+}
+
+define void @f4(ptr %src, ptr %dst) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lh %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %s = sext i16 %b to i32
+ store volatile i32 %s, ptr %dst
+ ret void
+}
+
+define void @f5(ptr %src, ptr %dst) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgh %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %s = sext i16 %b to i64
+ store volatile i64 %s, ptr %dst
+ ret void
+}
+
+define void @f6(ptr %src, ptr %dst) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgf %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %s = sext i32 %b to i64
+ store volatile i64 %s, ptr %dst
+ ret void
+}
+
+; Zero-extending atomic loads.
+define void @f7(ptr %src, ptr %dst) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llc %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i16
+ store volatile i16 %z, ptr %dst
+ ret void
+}
+
+define void @f8(ptr %src, ptr %dst) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llc %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i32
+ store volatile i32 %z, ptr %dst
+ ret void
+}
+
+define void @f9(ptr %src, ptr %dst) {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i64
+ store volatile i64 %z, ptr %dst
+ ret void
+}
+
+define void @f10(ptr %src, ptr %dst) {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llh %r0, 0(%r2)
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %z = zext i16 %b to i32
+ store volatile i32 %z, ptr %dst
+ ret void
+}
+
+define void @f11(ptr %src, ptr %dst) {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgh %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %z = zext i16 %b to i64
+ store volatile i64 %z, ptr %dst
+ ret void
+}
+
+define void @f12(ptr %src, ptr %dst) {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgf %r0, 0(%r2)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %z = zext i32 %b to i64
+ store volatile i64 %z, ptr %dst
+ ret void
+}
+
+; reg/mem
+define i64 @f13(i64 %a, ptr %src) {
+; CHECK-LABEL: f13:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ag %r2, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i64, ptr %src seq_cst, align 8
+ %add = add i64 %a, %b
+ ret i64 %add
+}
+
+; reg/mem op with extension from memory.
+define i64 @f14(i64 %a, ptr %src) {
+; CHECK-LABEL: f14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slgf %r2, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %bext = zext i32 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check that maeb (reg/mem) is *not* used for an atomic load.
+define float @f15(float %f1, ptr %ptr, float %acc) {
+; CHECK-LABEL: f15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lde %f1, 0(%r2)
+; CHECK-NEXT: wfmasb %f0, %f0, %f1, %f2
+; CHECK-NEXT: br %r14
+ %f2 = load atomic float, ptr %ptr seq_cst, align 4
+ %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+ ret float %res
+}
+declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
+
+; Do it twice for good measure given the involved DAG combines.
+define void @f16(ptr %src, ptr %dst) {
+; CHECK-LABEL: f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: lgbr %r1, %r0
+; CHECK-NEXT: stg %r1, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: lgbr %r1, %r0
+; CHECK-NEXT: stg %r1, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i64
+ %z = zext i8 %b to i64
+ store volatile i64 %s, ptr %dst
+ store volatile i64 %z, ptr %dst
+
+ %b2 = load atomic i8, ptr %src seq_cst, align 1
+ %s2 = sext i8 %b2 to i64
+ %z2 = zext i8 %b2 to i64
+ store volatile i64 %s2, ptr %dst
+ store volatile i64 %z2, ptr %dst
+
+ ret void
+}
+
+define void @f16_b(ptr %src, ptr %dst) {
+; CHECK-LABEL: f16_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgb %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %s = sext i8 %b to i16
+ store volatile i16 %s, ptr %dst
+
+ %s2 = sext i8 %b to i64
+ store volatile i64 %s2, ptr %dst
+
+ ret void
+}
+
+define void @f16_c(ptr %src, ptr %dst) {
+; CHECK-LABEL: f16_c:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgc %r0, 0(%r2)
+; CHECK-NEXT: sth %r0, 0(%r3)
+; CHECK-NEXT: stg %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %z = zext i8 %b to i16
+ store volatile i16 %z, ptr %dst
+
+ %z2 = zext i8 %b to i64
+ store volatile i64 %z2, ptr %dst
+
+ ret void
+}
+
+; Check that two i8 loads use a reg/reg op.
+define i8 @f16_d(ptr %src, ptr %src2) {
+; CHECK-LABEL: f16_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r2, 0(%r2)
+; CHECK-NEXT: lb %r0, 0(%r3)
+; CHECK-NEXT: ar %r2, %r0
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %b2 = load atomic i8, ptr %src2 seq_cst, align 1
+ %add = add i8 %b, %b2
+ ret i8 %add
+}
+
+; Binary operations on a byte in memory, with an atomic load.
+define void @f17(ptr %ptr) {
+; CHECK-LABEL: f17:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ni 0(%r2), 1
+; CHECK-NEXT: br %r14
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %xor = and i8 %val, -255
+ store i8 %xor, ptr %ptr
+ ret void
+}
+
+define void @f18(ptr %src) {
+; CHECK-LABEL: f18:
+; CHECK: # %bb.0:
+; CHECK-NEXT: oiy 4096(%r2), 1
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i8, ptr %src, i64 4096
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %xor = or i8 %val, -255
+ store i8 %xor, ptr %ptr
+ ret void
+}
+
+define void @f19(ptr %src) {
+; CHECK-LABEL: f19:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xi 4095(%r2), 1
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i8, ptr %src, i64 4095
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %xor = xor i8 %val, -255
+ store i8 %xor, ptr %ptr
+ ret void
+}
+
+; TM
+define double @f20(ptr %src, double %a, double %b) {
+; CHECK-LABEL: f20:
+; CHECK: # %bb.0:
+; CHECK-NEXT: tm 0(%r2), 1
+; CHECK-NEXT: je .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ldr %f2, %f0
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: ldr %f0, %f2
+; CHECK-NEXT: br %r14
+ %byte = load atomic i8, ptr %src seq_cst, align 1
+ %and = and i8 %byte, 1
+ %cmp = icmp eq i8 %and, 0
+ %res = select i1 %cmp, double %b, double %a
+ ret double %res
+}
+
+; vector load and replicate
+define void @f21(ptr %src, ptr %dst) {
+; CHECK-LABEL: f21:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepb %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i8, ptr %src seq_cst, align 1
+ %v = insertelement <16 x i8> undef, i8 %b, i32 1
+ store volatile <16 x i8> %v, ptr %dst
+ ret void
+}
+
+define void @f22(ptr %src, ptr %dst) {
+; CHECK-LABEL: f22:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlreph %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i16, ptr %src seq_cst, align 2
+ %v = insertelement <8 x i16> undef, i16 %b, i32 1
+ store volatile <8 x i16> %v, ptr %dst
+ ret void
+}
+
+define void @f23(ptr %src, ptr %dst) {
+; CHECK-LABEL: f23:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepf %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i32, ptr %src seq_cst, align 4
+ %v = insertelement <4 x i32> undef, i32 %b, i32 2
+ store volatile <4 x i32> %v, ptr %dst
+ ret void
+}
+
+define void @f24(ptr %src, ptr %dst) {
+; CHECK-LABEL: f24:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepg %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic i64, ptr %src seq_cst, align 8
+ %v = insertelement <2 x i64> undef, i64 %b, i32 0
+ store volatile <2 x i64> %v, ptr %dst
+ ret void
+}
+
+define void @f25(ptr %src, ptr %dst) {
+; CHECK-LABEL: f25:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepf %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic float, ptr %src seq_cst, align 4
+ %v = insertelement <4 x float> undef, float %b, i32 1
+ store volatile <4 x float> %v, ptr %dst
+ ret void
+}
+
+define void @f25_b(ptr %src, ptr %dst) {
+; CHECK-LABEL: f25_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepf %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %l = load atomic i32, ptr %src seq_cst, align 4
+ %b = bitcast i32 %l to float
+ %v = insertelement <4 x float> undef, float %b, i32 1
+ store volatile <4 x float> %v, ptr %dst
+ ret void
+}
+
+define void @f26(ptr %src, ptr %dst) {
+; CHECK-LABEL: f26:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepg %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %b = load atomic double, ptr %src seq_cst, align 8
+ %v = insertelement <2 x double> undef, double %b, i32 0
+ store volatile <2 x double> %v, ptr %dst
+ ret void
+}
+
+define void @f26_b(ptr %src, ptr %dst) {
+; CHECK-LABEL: f26_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepg %v0, 0(%r2)
+; CHECK-NEXT: vst %v0, 0(%r3), 3
+; CHECK-NEXT: br %r14
+ %l = load atomic i64, ptr %src seq_cst, align 8
+ %b = bitcast i64 %l to double
+ %v = insertelement <2 x double> undef, double %b, i32 0
+ store volatile <2 x double> %v, ptr %dst
+ ret void
+}
+
+; Vector Load logical element and zero.
+define <16 x i8> @f27(ptr %ptr) {
+; CHECK-LABEL: f27:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezb %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @f28(ptr %ptr) {
+; CHECK-LABEL: f28:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezh %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i16, ptr %ptr seq_cst, align 2
+ %ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @f29(ptr %ptr) {
+; CHECK-LABEL: f29:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i32, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @f30(ptr %ptr) {
+; CHECK-LABEL: f30:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezg %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i64, ptr %ptr seq_cst, align 8
+ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
+ ret <2 x i64> %ret
+}
+
+define <4 x i32> @f31(ptr %ptr) {
+; CHECK-LABEL: f31:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezlf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic i32, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0
+ ret <4 x i32> %ret
+}
+
+define <4 x float> @f32(ptr %ptr) {
+; CHECK-LABEL: f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vllezlf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load atomic float, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 0
+ ret <4 x float> %ret
+}
+
+; Vector Load element.
+define <16 x i8> @f33(<16 x i8> %val, ptr %ptr) {
+; CHECK-LABEL: f33:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vleb %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i8, ptr %ptr seq_cst, align 1
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 0
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @f34(<8 x i16> %val, ptr %ptr) {
+; CHECK-LABEL: f34:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vleh %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i16, ptr %ptr seq_cst, align 2
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 0
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @f35(<4 x i32> %val, ptr %ptr) {
+; CHECK-LABEL: f35:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlef %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i32, ptr %ptr seq_cst, align 4
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @f36(<2 x i64> %val, ptr %ptr) {
+; CHECK-LABEL: f36:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vleg %v24, 0(%r2), 0
+; CHECK-NEXT: br %r14
+ %element = load atomic i64, ptr %ptr seq_cst, align 8
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test that fp values are loaded/stored directly. Clang FE currently always
+; emits atomic load/stores casted this way.
+define void @f37(ptr %src, ptr %dst) {
+; CHECK-LABEL: f37:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld %f0, 0(%r2)
+; CHECK-NEXT: adbr %f0, %f0
+; CHECK-NEXT: std %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i64, ptr %src seq_cst, align 8
+ %bc0 = bitcast i64 %atomic-load to double
+ %fa = fadd double %bc0, %bc0
+ %bc1 = bitcast double %fa to i64
+ store atomic i64 %bc1, ptr %dst seq_cst, align 8
+ ret void
+}
+
+define void @f38(ptr %src, ptr %dst) {
+; CHECK-LABEL: f38:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lde %f0, 0(%r2)
+; CHECK-NEXT: aebr %f0, %f0
+; CHECK-NEXT: ste %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i32, ptr %src seq_cst, align 8
+ %bc0 = bitcast i32 %atomic-load to float
+ %fa = fadd float %bc0, %bc0
+ %bc1 = bitcast float %fa to i32
+ store atomic i32 %bc1, ptr %dst seq_cst, align 8
+ ret void
+}
+
+; Test operation on memory involving atomic load and store.
+define void @f39(ptr %ptr) {
+; CHECK-LABEL: f39:
+; CHECK: # %bb.0:
+; CHECK-NEXT: oi 0(%r2), 1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %val = load atomic i8, ptr %ptr seq_cst, align 1
+ %or = or i8 %val, -255
+ store atomic i8 %or, ptr %ptr seq_cst, align 1
+ ret void
+}
+
+; Some atomic stores of immediates.
+define void @f40(ptr %ptr) {
+; CHECK-LABEL: f40:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mvi 0(%r2), 128
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i8 128, ptr %ptr seq_cst, align 1
+ ret void
+}
+
+define void @f41(ptr %ptr) {
+; CHECK-LABEL: f41:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mvhi 0(%r2), -1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i32 4294967295, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f42(ptr %ptr) {
+; CHECK-LABEL: f42:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mvhi 0(%r2), -1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i32 4294967295, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f43(ptr %ptr) {
+; CHECK-LABEL: f43:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llihl %r0, 255
+; CHECK-NEXT: oilf %r0, 4294967295
+; CHECK-NEXT: stg %r0, 0(%r2)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic i64 1099511627775, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+define void @f44(ptr %ptr) {
+; CHECK-LABEL: f44:
+; CHECK: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI48_0
+; CHECK-NEXT: ld %f0, 0(%r1)
+; CHECK-NEXT: std %f0, 0(%r2)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ store atomic double 0x3ff0000020000000, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+; Vector Store Element.
+define void @f45(<16 x i8> %val, ptr %ptr) {
+; CHECK-LABEL: f45:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteb %v24, 0(%r2), 0
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <16 x i8> %val, i32 0
+ store atomic i8 %element, ptr %ptr seq_cst, align 1
+ ret void
+}
+
+define void @f46(<8 x i16> %val, ptr %base) {
+; CHECK-LABEL: f46:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteh %v24, 4094(%r2), 5
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i16, ptr %base, i32 2047
+ %element = extractelement <8 x i16> %val, i32 5
+ store atomic i16 %element, ptr %ptr seq_cst, align 2
+ ret void
+}
+
+define void @f47(<4 x i32> %val, ptr %ptr) {
+; CHECK-LABEL: f47:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vstef %v24, 0(%r2), 3
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <4 x i32> %val, i32 3
+ store atomic i32 %element, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f48(<2 x i64> %val, ptr %ptr) {
+; CHECK-LABEL: f48:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteg %v24, 0(%r2), 1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <2 x i64> %val, i32 1
+ store atomic i64 %element, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+define void @f49(<4 x float> %val, ptr %ptr) {
+; CHECK-LABEL: f49:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vstef %v24, 0(%r2), 0
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <4 x float> %val, i32 0
+ store atomic float %element, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @f50(<2 x double> %val, ptr %ptr) {
+; CHECK-LABEL: f50:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsteg %v24, 0(%r2), 1
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %element = extractelement <2 x double> %val, i32 1
+ store atomic double %element, ptr %ptr seq_cst, align 8
+ ret void
+}
+
+define void @f51(ptr %src, ptr %dst) {
+; CHECK-LABEL: f51:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lpq %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r1
+; CHECK-NEXT: vgmf %v1, 2, 8
+; CHECK-NEXT: aebr %f0, %f1
+; CHECK-NEXT: ste %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i128, ptr %src seq_cst, align 16
+ %b0 = bitcast i128 %atomic-load to <4 x float>
+ %vecext = extractelement <4 x float> %b0, i64 0
+ %add = fadd float %vecext, 1.000000e+00
+ %b1 = bitcast float %add to i32
+ store atomic i32 %b1, ptr %dst seq_cst, align 4
+ ret void
+}
+
+define void @f52(ptr %src, ptr %dst) {
+; CHECK-LABEL: f52:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lpq %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r1
+; CHECK-NEXT: vgmg %v1, 2, 11
+; CHECK-NEXT: adbr %f0, %f1
+; CHECK-NEXT: std %f0, 0(%r3)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %atomic-load = load atomic i128, ptr %src seq_cst, align 16
+ %b0 = bitcast i128 %atomic-load to <2 x double>
+ %vecext = extractelement <2 x double> %b0, i64 0
+ %add = fadd double %vecext, 1.000000e+00
+ %b1 = bitcast double %add to i64
+ store atomic i64 %b1, ptr %dst seq_cst, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-06.ll b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll
index b748bfc767a4db..91e324b0af1a97 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-store-06.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll
@@ -6,10 +6,7 @@
define void @f1(ptr %src, float %val) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0s killed $f0s def $f0d
-; CHECK-NEXT: lgdr %r0, %f0
-; CHECK-NEXT: srlg %r0, %r0, 32
-; CHECK-NEXT: st %r0, 0(%r2)
+; CHECK-NEXT: ste %f0, 0(%r2)
; CHECK-NEXT: bcr 15, %r0
; CHECK-NEXT: br %r14
store atomic float %val, ptr %src seq_cst, align 4
>From 4d0e6f740720ed5e0d550a0b59af34d2894f562a Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Tue, 9 Jan 2024 13:40:12 -0600
Subject: [PATCH 2/3] Updates after review
---
.../SelectionDAG/LegalizeIntegerTypes.cpp | 13 ++++--
.../Target/SystemZ/SystemZISelDAGToDAG.cpp | 7 +---
.../Target/SystemZ/SystemZISelLowering.cpp | 15 ++++---
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 4 +-
llvm/lib/Target/SystemZ/SystemZInstrFP.td | 8 ++--
llvm/lib/Target/SystemZ/SystemZOperators.td | 4 --
llvm/test/CodeGen/SystemZ/atomic-memofolds.ll | 42 +++++++++++++++++--
7 files changed, 63 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 7642b6f4160a7b..698ec9c584c1d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -343,12 +343,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
if (N->getOpcode() == ISD::ATOMIC_LOAD) {
ISD::LoadExtType ETy = cast<AtomicSDNode>(N)->getExtensionType();
if (ETy == ISD::NON_EXTLOAD) {
- if (TLI.getExtendForAtomicOps() == ISD::SIGN_EXTEND)
+ switch (TLI.getExtendForAtomicOps()) {
+ case ISD::SIGN_EXTEND:
ETy = ISD::SEXTLOAD;
- else if (TLI.getExtendForAtomicOps() == ISD::ZERO_EXTEND)
+ break;
+ case ISD::ZERO_EXTEND:
ETy = ISD::ZEXTLOAD;
- else
+ break;
+ case ISD::ANY_EXTEND:
ETy = ISD::EXTLOAD;
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
}
cast<AtomicSDNode>(Res)->setExtensionType(ETy);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index f4e6081ae82104..48b2999096fcd4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1516,13 +1516,10 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
MachineMemOperand *MMO = MemAccess->getMemOperand();
assert(MMO && "Expected a memory operand.");
- // These instructions are not atomic.
- if (MMO->isAtomic())
- return false;
-
// The memory access must have a proper alignment and no index register.
+ // ATOMIC_LOADs do not have the offset operand.
if (MemAccess->getAlign().value() < StoreSize ||
- !MemAccess->getOffset().isUndef())
+ (!MMO->isAtomic() && !MemAccess->getOffset().isUndef()))
return false;
// The MMO must not have an unaligned offset.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 39bd1aea6979c4..c8411ca577f11c 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4515,7 +4515,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
-SDValue SystemZTargetLowering::lowerATOMIC_I128_LDST(SDValue Op,
+SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
@@ -5645,12 +5645,11 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
return GS.getNode(DAG, SDLoc(BVN));
}
-bool SystemZTargetLowering::isVectorElementLoad(SDValue Op, EVT VecVT) const {
+bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
return true;
if (auto *AL = dyn_cast<AtomicSDNode>(Op))
- if (AL->getOpcode() == ISD::ATOMIC_LOAD && SDValue(AL, 0).hasOneUse() &&
- AL->getMemoryVT() == VecVT.getScalarType())
+ if (AL->getOpcode() == ISD::ATOMIC_LOAD)
return true;
if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
return true;
@@ -5689,13 +5688,13 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
- if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single, VT)))
+ if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
- if (!isVectorElementLoad(Elem, VT)) {
+ if (!isVectorElementLoad(Elem)) {
AllLoads = false;
break;
}
@@ -5767,7 +5766,7 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
- if (isVectorElementLoad(Elems[I], VT)) {
+ if (isVectorElementLoad(Elems[I])) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@@ -6129,7 +6128,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
case ISD::ATOMIC_LOAD:
- return lowerATOMIC_I128_LDST(Op, DAG);
+ return lowerATOMIC_LDST_I128(Op, DAG);
case ISD::ATOMIC_LOAD_ADD:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 9c442268dbb111..e1ea069f30ba86 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -694,7 +694,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerATOMIC_I128_LDST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
@@ -704,7 +704,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- bool isVectorElementLoad(SDValue Op, EVT VecVT) const;
+ bool isVectorElementLoad(SDValue Op) const;
SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 8cafa0a936a404..6e67425c1e788b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -495,8 +495,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, nonatomic_ld, 4>;
- defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, nonatomic_ld, 8>;
+ defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+ defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
}
// Fused multiply-subtract.
@@ -504,8 +504,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, nonatomic_ld, 4>;
- defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, nonatomic_ld, 8>;
+ defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+ defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
}
// Division.
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 28815083daab0c..d98bb886c18506 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -607,10 +607,6 @@ def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
-def nonatomic_ld : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return !cast<LoadSDNode>(N)->isAtomic();
-}]>;
-
// Non-volatile stores.
class NonvolatileStore<SDPatternOperator store>
: PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
diff --git a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
index 56c1eb2b85a8d5..fa1578df04bec1 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
@@ -170,12 +170,11 @@ define i64 @f14(i64 %a, ptr %src) {
ret i64 %sub
}
-; Check that maeb (reg/mem) is *not* used for an atomic load.
define float @f15(float %f1, ptr %ptr, float %acc) {
; CHECK-LABEL: f15:
; CHECK: # %bb.0:
-; CHECK-NEXT: lde %f1, 0(%r2)
-; CHECK-NEXT: wfmasb %f0, %f0, %f1, %f2
+; CHECK-NEXT: maeb %f2, %f0, 0(%r2)
+; CHECK-NEXT: ldr %f0, %f2
; CHECK-NEXT: br %r14
%f2 = load atomic float, ptr %ptr seq_cst, align 4
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
@@ -387,6 +386,39 @@ define void @f25_b(ptr %src, ptr %dst) {
ret void
}
+; Do *not* use vlrep for an extending load.
+define <4 x i32> @f25_c(ptr %ptr) {
+; CHECK-LABEL: f25_c:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lb %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r0
+; CHECK-NEXT: vrepf %v24, %v0, 1
+; CHECK-NEXT: br %r14
+ %L = load atomic i8, ptr %ptr seq_cst, align 4
+ %S = sext i8 %L to i32
+ %val = insertelement <4 x i32> undef, i32 %S, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Do *not* use vlrep if there is another scalar use.
+define <4 x i32> @f25_d(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: f25_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: l %r0, 0(%r2)
+; CHECK-NEXT: vlvgp %v0, %r0, %r0
+; CHECK-NEXT: vrepf %v24, %v0, 1
+; CHECK-NEXT: st %r0, 0(%r3)
+; CHECK-NEXT: br %r14
+ %L = load atomic i32, ptr %ptr seq_cst, align 4
+ store i32 %L, ptr %dst, align 4
+ %val = insertelement <4 x i32> undef, i32 %L, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
define void @f26(ptr %src, ptr %dst) {
; CHECK-LABEL: f26:
; CHECK: # %bb.0:
@@ -412,6 +444,8 @@ define void @f26_b(ptr %src, ptr %dst) {
ret void
}
+
+
; Vector Load logical element and zero.
define <16 x i8> @f27(ptr %ptr) {
; CHECK-LABEL: f27:
@@ -607,7 +641,7 @@ define void @f43(ptr %ptr) {
define void @f44(ptr %ptr) {
; CHECK-LABEL: f44:
; CHECK: # %bb.0:
-; CHECK-NEXT: larl %r1, .LCPI48_0
+; CHECK-NEXT: larl %r1, .LCPI50_0
; CHECK-NEXT: ld %f0, 0(%r1)
; CHECK-NEXT: std %f0, 0(%r2)
; CHECK-NEXT: bcr 14, %r0
>From 30a96e3e4188bdbf5c4f4e371670d90c5c751c3f Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Tue, 6 Feb 2024 12:54:57 +0100
Subject: [PATCH 3/3] Try selecting atomic loads with patfrags again.
---
.../include/llvm/Target/TargetSelectionDAG.td | 2 +-
llvm/lib/Target/PowerPC/PPCInstrInfo.td | 12 +-
llvm/lib/Target/PowerPC/PPCInstrP10.td | 16 +-
.../Target/SystemZ/SystemZISelDAGToDAG.cpp | 68 ++---
.../Target/SystemZ/SystemZISelLowering.cpp | 4 +-
llvm/lib/Target/SystemZ/SystemZInstrFP.td | 46 ++--
.../lib/Target/SystemZ/SystemZInstrFormats.td | 4 +-
llvm/lib/Target/SystemZ/SystemZInstrHFP.td | 54 ++--
llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 250 +++++++++---------
llvm/lib/Target/SystemZ/SystemZInstrVector.td | 40 +--
llvm/lib/Target/SystemZ/SystemZOperators.td | 194 ++++++++++----
llvm/lib/Target/SystemZ/SystemZPatterns.td | 4 +-
llvm/lib/Target/VE/VEInstrInfo.td | 8 +-
llvm/test/CodeGen/SystemZ/atomic-load-06.ll | 2 +-
.../CodeGen/SystemZ/atomic-memops-fp128.ll | 31 +++
.../{atomic-memofolds.ll => atomic-memops.ll} | 52 +++-
16 files changed, 477 insertions(+), 310 deletions(-)
create mode 100644 llvm/test/CodeGen/SystemZ/atomic-memops-fp128.ll
rename llvm/test/CodeGen/SystemZ/{atomic-memofolds.ll => atomic-memops.ll} (94%)
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 22360353790dbc..0b156ab6c64983 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -318,7 +318,7 @@ def SDTAtomicStore : SDTypeProfile<0, 2, [
SDTCisInt<0>, SDTCisPtrTy<1>
]>;
def SDTAtomicLoad : SDTypeProfile<1, 1, [
- SDTCisInt<0>, SDTCisPtrTy<1>
+ SDTCisPtrTy<1>
]>;
class SDCallSeqStart<list<SDTypeConstraint> constraints> :
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 3abd97f2c38c09..70c2fc74818717 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5035,12 +5035,12 @@ defm : TrapExtendedMnemonic<"lng", 6>;
defm : TrapExtendedMnemonic<"u", 31>;
// Atomic loads
-def : Pat<(atomic_load_8 DForm:$src), (LBZ memri:$src)>;
-def : Pat<(atomic_load_16 DForm:$src), (LHZ memri:$src)>;
-def : Pat<(atomic_load_32 DForm:$src), (LWZ memri:$src)>;
-def : Pat<(atomic_load_8 XForm:$src), (LBZX memrr:$src)>;
-def : Pat<(atomic_load_16 XForm:$src), (LHZX memrr:$src)>;
-def : Pat<(atomic_load_32 XForm:$src), (LWZX memrr:$src)>;
+def : Pat<(iAny (atomic_load_8 DForm:$src)), (LBZ memri:$src)>;
+def : Pat<(iAny (atomic_load_16 DForm:$src)), (LHZ memri:$src)>;
+def : Pat<(iAny (atomic_load_32 DForm:$src)), (LWZ memri:$src)>;
+def : Pat<(iAny (atomic_load_8 XForm:$src)), (LBZX memrr:$src)>;
+def : Pat<(iAny (atomic_load_16 XForm:$src)), (LHZX memrr:$src)>;
+def : Pat<(iAny (atomic_load_32 XForm:$src)), (LWZX memrr:$src)>;
// Atomic stores
def : Pat<(atomic_store_8 i32:$val, DForm:$ptr), (STB gprc:$val, memri:$ptr)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index d5a372e4dc1010..41068f86935f8a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -1289,13 +1289,13 @@ let Predicates = [PCRelativeMemops] in {
(PSTXVpc $XS, $ga, 0)>;
// Atomic Load
- def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)),
+ def : Pat<(iAny (atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)),
+ def : Pat<(iAny (atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZpc $ga, 0)>;
- def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)),
+ def : Pat<(iAny (atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWZpc $ga, 0)>;
- def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)),
+ def : Pat<(iAny (atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga))),
(PLDpc $ga, 0)>;
// Atomic Store
@@ -2347,10 +2347,10 @@ let Predicates = [PrefixInstrs] in {
def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>;
// Atomic Load
- def : Pat<(atomic_load_8 PDForm:$src), (PLBZ memri34:$src)>;
- def : Pat<(atomic_load_16 PDForm:$src), (PLHZ memri34:$src)>;
- def : Pat<(atomic_load_32 PDForm:$src), (PLWZ memri34:$src)>;
- def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>;
+ def : Pat<(iAny (atomic_load_8 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(iAny (atomic_load_16 PDForm:$src)), (PLHZ memri34:$src)>;
+ def : Pat<(iAny (atomic_load_32 PDForm:$src)), (PLWZ memri34:$src)>;
+ def : Pat<(iAny (atomic_load_64 PDForm:$src)), (PLD memri34:$src)>;
// Atomic Store
def : Pat<(atomic_store_8 i32:$RS, PDForm:$dst), (PSTB $RS, memri34:$dst)>;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 48b2999096fcd4..cc1cbb23ccde4b 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -344,12 +344,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// requirements for a PC-relative access.
bool storeLoadIsAligned(SDNode *N) const;
+ // Return the load extension type of a load or atomic load.
+ ISD::LoadExtType getLoadExtType(SDNode *N) const;
+
// Try to expand a boolean SELECT_CCMASK using an IPM sequence.
SDValue expandSelectBoolean(SDNode *Node);
- // Convert ATOMIC_LOADs to LOADs to facilitate instruction selection.
- void convertATOMIC_LOADs(SDNode *Node, unsigned Depth = 0);
-
public:
static char ID;
@@ -1510,16 +1510,17 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
- auto *MemAccess = cast<LSBaseSDNode>(N);
+ auto *MemAccess = cast<MemSDNode>(N);
+ auto *LdSt = dyn_cast<LSBaseSDNode>(MemAccess);
TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize();
SDValue BasePtr = MemAccess->getBasePtr();
MachineMemOperand *MMO = MemAccess->getMemOperand();
assert(MMO && "Expected a memory operand.");
// The memory access must have a proper alignment and no index register.
- // ATOMIC_LOADs do not have the offset operand.
+ // Only load and store nodes have the offset operand (atomic loads do not).
if (MemAccess->getAlign().value() < StoreSize ||
- (!MMO->isAtomic() && !MemAccess->getOffset().isUndef()))
+ (LdSt && !LdSt->getOffset().isUndef()))
return false;
// The MMO must not have an unaligned offset.
@@ -1549,35 +1550,15 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
return true;
}
-// This is a hack to convert ATOMIC_LOADs to LOADs in the last minute just
-// before instruction selection begins. It would have been easier if
-// ATOMIC_LOAD nodes would instead always be built by SelectionDAGBuilder as
-// LOADs with an atomic MMO and properly handled as such in DAGCombiner, but
-// until that changes they need to remain as ATOMIC_LOADs until all
-// DAGCombining is done. Convert Node or any of its operands from
-// ATOMIC_LOAD to LOAD.
-void SystemZDAGToDAGISel::convertATOMIC_LOADs(SDNode *Node, unsigned Depth) {
- if (Depth > 1) // Chain operands are also followed so this seems enough.
- return;
- if (Node->getOpcode() == ISD::ATOMIC_LOAD) {
- auto *ALoad = cast<AtomicSDNode>(Node);
- // It seems necessary to morph the node as it is not yet being selected.
- LoadSDNode *Ld = cast<LoadSDNode>(CurDAG->MorphNodeTo(
- ALoad, ISD::LOAD, CurDAG->getVTList(ALoad->getValueType(0), MVT::Other),
- {ALoad->getChain(), ALoad->getBasePtr()}));
- // Sanity check the morph. The extension type for an extending load
- // should have been set prior to instruction selection and remain in the
- // morphed node.
- assert(((SDNode *)Ld) == ((SDNode *)ALoad) && "Bad CSE on atomic load.");
- assert(Ld->getMemOperand()->isAtomic() && "Broken MMO.");
- ISD::LoadExtType ETy = Ld->getExtensionType();
- bool IsNonExt = Ld->getMemoryVT().getSizeInBits() ==
- Ld->getValueType(0).getSizeInBits();
- assert(IsNonExt == (ETy == ISD::NON_EXTLOAD) && "Bad extension type.");
- return;
- }
- for (SDValue Op : Node->ops())
- convertATOMIC_LOADs(Op.getNode(), ++Depth);
+ISD::LoadExtType SystemZDAGToDAGISel::getLoadExtType(SDNode *N) const {
+ ISD::LoadExtType ETy;
+ if (auto *L = dyn_cast<LoadSDNode>(N))
+ ETy = L->getExtensionType();
+ else if (auto *AL = dyn_cast<AtomicSDNode>(N))
+ ETy = AL->getExtensionType();
+ else
+ llvm_unreachable("Unkown load node type.");
+ return ETy;
}
void SystemZDAGToDAGISel::Select(SDNode *Node) {
@@ -1588,9 +1569,6 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
return;
}
- // Prepare any ATOMIC_LOAD to be selected as a LOAD with an atomic MMO.
- convertATOMIC_LOADs(Node);
-
unsigned Opcode = Node->getOpcode();
switch (Opcode) {
case ISD::OR:
@@ -1783,7 +1761,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
case ISD::ATOMIC_STORE: {
auto *AtomOp = cast<AtomicSDNode>(Node);
- // Store FP values directly without first moving to a GPR.
+ // Store FP values directly without first moving to a GPR. This is needed
+ // as long as clang always emits the cast to integer.
EVT SVT = AtomOp->getMemoryVT();
SDValue StoredVal = AtomOp->getVal();
if (SVT.isInteger() && StoredVal->getOpcode() == ISD::BITCAST &&
@@ -1791,6 +1770,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
StoredVal = StoredVal->getOperand(0);
SVT = StoredVal.getValueType();
}
+ // Replace the atomic_store with a regular store and select it. This is
+ // ok since we know all store instructions <= 8 bytes are atomic, and the
+ // 16 byte case is already handled during lowering.
StoreSDNode *St = cast<StoreSDNode>(CurDAG->getTruncStore(
AtomOp->getChain(), SDLoc(AtomOp), StoredVal, AtomOp->getBasePtr(), SVT,
AtomOp->getMemOperand()));
@@ -1807,6 +1789,14 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
}
}
+#ifndef NDEBUG
+ if (auto *AL = dyn_cast<AtomicSDNode>(Node))
+ if (AL->getOpcode() == ISD::ATOMIC_LOAD)
+ assert((AL->getExtensionType() == ISD::NON_EXTLOAD ||
+ AL->getMemoryVT().isScalarInteger()) &&
+ "Not expecting extending fp atomic_load nodes.");
+#endif
+
SelectCode(Node);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c8411ca577f11c..75fd61461ecb15 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -916,6 +916,7 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
return false;
}
+// FIXME: Clang emits these casts always regardless of these hooks.
TargetLowering::AtomicExpansionKind
SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
// Lower fp128 the same way as i128.
@@ -6604,7 +6605,8 @@ SDValue SystemZTargetLowering::combineBITCAST(SDNode *N,
EVT ResVT = N->getValueType(0);
// Handle atomic loads to load float/double values directly and not via a
// GPR. Do it before legalization to help in treating the ATOMIC_LOAD the
- // same way as a LOAD, and e.g. emit a REPLICATE.
+ // same way as a LOAD, and e.g. emit a REPLICATE. FIXME: This is only
+ // needed because clang currently emits these casts always.
if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
if (ALoad->getOpcode() == ISD::ATOMIC_LOAD && InVT.getSizeInBits() <= 64 &&
ALoad->getExtensionType() == ISD::NON_EXTLOAD &&
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 6e67425c1e788b..f4b5aeaebef923 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -129,8 +129,8 @@ defm LoadStoreF128 : MVCLoadStore<load, f128, MVCImm, 15>;
//===----------------------------------------------------------------------===//
let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in {
- defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
- defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
+ defm LE : UnaryRXPair<"le", 0x78, 0xED64, z_load, FP32, 4>;
+ defm LD : UnaryRXPair<"ld", 0x68, 0xED65, z_load, FP64, 8>;
// For z13 we prefer LDE over LE to avoid partial register dependencies.
let isCodeGenOnly = 1 in
@@ -200,14 +200,14 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
// Extend memory floating-point values to wider representations.
let Uses = [FPC], mayRaiseFPException = 1 in {
- def LDEB : UnaryRXE<"ldeb", 0xED04, any_extloadf32, FP64, 4>;
+ def LDEB : UnaryRXE<"ldeb", 0xED04, z_any_extloadf32, FP64, 4>;
def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
}
let Predicates = [FeatureNoVectorEnhancements1] in {
- def : Pat<(f128 (any_extloadf32 bdxaddr12only:$src)),
+ def : Pat<(f128 (z_any_extloadf32 bdxaddr12only:$src)),
(LXEB bdxaddr12only:$src)>;
- def : Pat<(f128 (any_extloadf64 bdxaddr12only:$src)),
+ def : Pat<(f128 (z_any_extloadf64 bdxaddr12only:$src)),
(LXDB bdxaddr12only:$src)>;
}
@@ -430,8 +430,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
}
- defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
- defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, load, 8>;
+ defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, z_load, 4>;
+ defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, z_load, 8>;
}
// Subtraction.
@@ -441,8 +441,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
- defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, load, 4>;
- defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
+ defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, z_load, 4>;
+ defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, z_load, 8>;
}
// Multiplication.
@@ -452,8 +452,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
}
- defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, load, 4>;
- defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
+ defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, z_load, 4>;
+ defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, z_load, 8>;
}
// f64 multiplication of two FP32 registers.
@@ -466,7 +466,7 @@ def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
// f64 multiplication of an FP32 register and an f32 memory.
let Uses = [FPC], mayRaiseFPException = 1 in
- def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
+ def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, z_load, 4>;
def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
(f64 (any_extloadf32 bdxaddr12only:$addr))),
(MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
@@ -483,7 +483,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in
// f128 multiplication of an FP64 register and an f64 memory.
let Uses = [FPC], mayRaiseFPException = 1 in
- def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
+ def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, z_load, 8>;
let Predicates = [FeatureNoVectorEnhancements1] in
def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)),
(f128 (any_extloadf64 bdxaddr12only:$addr))),
@@ -495,8 +495,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
- defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+ defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, z_load, 4>;
+ defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, z_load, 8>;
}
// Fused multiply-subtract.
@@ -504,8 +504,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
- defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+ defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, z_load, 4>;
+ defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, z_load, 8>;
}
// Division.
@@ -514,8 +514,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>;
def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
- defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
- defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
+ defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, z_load, 4>;
+ defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, z_load, 8>;
}
// Divide to integer.
@@ -533,15 +533,15 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>;
def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>;
- def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>;
- def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>;
+ def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, z_load, 4>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, z_load, 8>;
def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>;
def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>;
def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>;
- def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, load, 4>;
- def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, load, 8>;
+ def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, z_load, 4>;
+ def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, z_load, 8>;
}
// Test Data Class.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index bb9fa0fc33ffa0..3dba33b66bf4f4 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -3777,7 +3777,7 @@ class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
Operand imm, AddressingMode mode = bdaddr12only>
: InstSI<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
+ [(store (operator (z_load mode:$BD1), imm:$I2), mode:$BD1)]> {
let mayLoad = 1;
let mayStore = 1;
}
@@ -3786,7 +3786,7 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Operand imm, AddressingMode mode = bdaddr20only>
: InstSIY<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
+ [(store (operator (z_load mode:$BD1), imm:$I2), mode:$BD1)]> {
let mayLoad = 1;
let mayStore = 1;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrHFP.td b/llvm/lib/Target/SystemZ/SystemZInstrHFP.td
index 2e3c9932d62142..d2e05b63c6c630 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrHFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrHFP.td
@@ -134,8 +134,8 @@ let Defs = [CC] in {
def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64, FP64>;
def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>;
}
- def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, load, 4>;
- def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, load, 8>;
+ def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, z_load, 4>;
+ def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, z_load, 8>;
}
// Addition (unnormalized).
@@ -144,8 +144,8 @@ let Defs = [CC] in {
def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>;
def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>;
}
- def AU : BinaryRX<"au", 0x7E, null_frag, FP32, load, 4>;
- def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, load, 8>;
+ def AU : BinaryRX<"au", 0x7E, null_frag, FP32, z_load, 4>;
+ def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, z_load, 8>;
}
// Subtraction.
@@ -154,8 +154,8 @@ let Defs = [CC] in {
def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64, FP64>;
def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>;
- def SE : BinaryRX<"se", 0x7B, null_frag, FP32, load, 4>;
- def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, load, 8>;
+ def SE : BinaryRX<"se", 0x7B, null_frag, FP32, z_load, 4>;
+ def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, z_load, 8>;
}
// Subtraction (unnormalized).
@@ -163,8 +163,8 @@ let Defs = [CC] in {
def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>;
def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>;
- def SU : BinaryRX<"su", 0x7F, null_frag, FP32, load, 4>;
- def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, load, 8>;
+ def SU : BinaryRX<"su", 0x7F, null_frag, FP32, z_load, 4>;
+ def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, z_load, 8>;
}
// Multiplication.
@@ -173,55 +173,55 @@ let isCommutable = 1 in {
def MDR : BinaryRR <"mdr", 0x2C, null_frag, FP64, FP64>;
def MXR : BinaryRR <"mxr", 0x26, null_frag, FP128, FP128>;
}
-def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, load, 4>;
-def MD : BinaryRX <"md", 0x6C, null_frag, FP64, load, 8>;
+def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, z_load, 4>;
+def MD : BinaryRX <"md", 0x6C, null_frag, FP64, z_load, 8>;
// Extending multiplication (f32 x f32 -> f64).
def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>;
-def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, load, 4>;
+def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, z_load, 4>;
let isAsmParserOnly = 1 in {
def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>;
- def ME : BinaryRX<"me", 0x7C, null_frag, FP64, load, 4>;
+ def ME : BinaryRX<"me", 0x7C, null_frag, FP64, z_load, 4>;
}
// Extending multiplication (f64 x f64 -> f128).
def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>;
-def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, load, 8>;
+def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, z_load, 8>;
// Fused multiply-add.
def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>;
def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>;
-def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, load, 4>;
-def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, load, 8>;
+def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, z_load, 4>;
+def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, z_load, 8>;
// Fused multiply-subtract.
def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>;
def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>;
-def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, load, 4>;
-def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, load, 8>;
+def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, z_load, 4>;
+def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, z_load, 8>;
// Multiplication (unnormalized).
def MYR : BinaryRRD<"myr", 0xB33B, null_frag, FP128, FP64>;
def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64, FP64>;
def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64, FP64>;
-def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, load, 8>;
-def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, load, 8>;
-def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, load, 8>;
+def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, z_load, 8>;
+def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, z_load, 8>;
+def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, z_load, 8>;
// Fused multiply-add (unnormalized).
def MAYR : TernaryRRD<"mayr", 0xB33A, null_frag, FP128, FP64>;
def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64, FP64>;
def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64, FP64>;
-def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, load, 8>;
-def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, load, 8>;
-def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, load, 8>;
+def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, z_load, 8>;
+def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, z_load, 8>;
+def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, z_load, 8>;
// Division.
def DER : BinaryRR <"der", 0x3D, null_frag, FP32, FP32>;
def DDR : BinaryRR <"ddr", 0x2D, null_frag, FP64, FP64>;
def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>;
-def DE : BinaryRX <"de", 0x7D, null_frag, FP32, load, 4>;
-def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, load, 8>;
+def DE : BinaryRX <"de", 0x7D, null_frag, FP32, z_load, 4>;
+def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, z_load, 8>;
//===----------------------------------------------------------------------===//
@@ -233,7 +233,7 @@ let Defs = [CC] in {
def CDR : CompareRR <"cdr", 0x29, null_frag, FP64, FP64>;
def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>;
- def CE : CompareRX<"ce", 0x79, null_frag, FP32, load, 4>;
- def CD : CompareRX<"cd", 0x69, null_frag, FP64, load, 8>;
+ def CE : CompareRX<"ce", 0x79, null_frag, FP32, z_load, 4>;
+ def CD : CompareRX<"cd", 0x69, null_frag, FP64, z_load, 8>;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 937e36057a6ede..96ea65b6c3d881 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -306,7 +306,7 @@ let Predicates = [IsTargetXPLINK64] in {
let mayLoad = 1, AddedComplexity = 20, hasNoSchedulingInfo = 1, Defs = [CC] in {
def ADA_ENTRY_VALUE : Alias<12, (outs GR64:$Reg), (ins adasym:$addr,
ADDR64:$ADA, imm64:$Offset),
- [(set i64:$Reg, (load (z_ada_entry
+ [(set i64:$Reg, (z_load (z_ada_entry
iPTR:$addr, iPTR:$ADA, i64:$Offset)))]>;
}
}
@@ -468,12 +468,12 @@ let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in {
// Register loads.
let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in {
// Expands to L, LY or LFH, depending on the choice of register.
- def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>,
+ def LMux : UnaryRXYPseudo<"l", z_load, GRX32, 4>,
Requires<[FeatureHighWord]>;
- defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>;
- def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>,
+ defm L : UnaryRXPair<"l", 0x58, 0xE358, z_load, GR32, 4>;
+ def LFH : UnaryRXY<"lfh", 0xE3CA, z_load, GRH32, 4>,
Requires<[FeatureHighWord]>;
- def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>;
+ def LG : UnaryRXY<"lg", 0xE304, z_load, GR64, 8>;
// These instructions are split after register allocation, so we don't
// want a custom inserter.
@@ -483,22 +483,22 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in {
}
}
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
- def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>;
- def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>;
+ def LT : UnaryRXY<"lt", 0xE312, z_load, GR32, 4>;
+ def LTG : UnaryRXY<"ltg", 0xE302, z_load, GR64, 8>;
}
let canFoldAsLoad = 1 in {
- def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
- def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
+ def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_z_load, GR32>;
+ def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_z_load, GR64>;
}
// Load and zero rightmost byte.
let Predicates = [FeatureLoadAndZeroRightmostByte] in {
def LZRF : UnaryRXY<"lzrf", 0xE33B, null_frag, GR32, 4>;
def LZRG : UnaryRXY<"lzrg", 0xE32A, null_frag, GR64, 8>;
- def : Pat<(and (i32 (load bdxaddr20only:$src)), 0xffffff00),
+ def : Pat<(and (i32 (z_load bdxaddr20only:$src)), 0xffffff00),
(LZRF bdxaddr20only:$src)>;
- def : Pat<(and (i64 (load bdxaddr20only:$src)), 0xffffffffffffff00),
+ def : Pat<(and (i64 (z_load bdxaddr20only:$src)), 0xffffffffffffff00),
(LZRG bdxaddr20only:$src)>;
}
@@ -689,29 +689,29 @@ def : Pat<(sext_inreg GR64:$src, i32),
// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH,
// depending on the choice of register.
-def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>,
+def LBMux : UnaryRXYPseudo<"lb", z_asextloadi8, GRX32, 1>,
Requires<[FeatureHighWord]>;
-def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>;
-def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>,
+def LB : UnaryRXY<"lb", 0xE376, z_asextloadi8, GR32, 1>;
+def LBH : UnaryRXY<"lbh", 0xE3C0, z_asextloadi8, GRH32, 1>,
Requires<[FeatureHighWord]>;
// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH,
// depending on the choice of register.
-def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>,
+def LHMux : UnaryRXYPseudo<"lh", z_asextloadi16, GRX32, 2>,
Requires<[FeatureHighWord]>;
-defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>;
-def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>,
+defm LH : UnaryRXPair<"lh", 0x48, 0xE378, z_asextloadi16, GR32, 2>;
+def LHH : UnaryRXY<"lhh", 0xE3C4, z_asextloadi16, GRH32, 2>,
Requires<[FeatureHighWord]>;
-def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>;
+def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_z_asextloadi16, GR32>;
// 64-bit extensions from memory.
-def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>;
-def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>;
-def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>;
-def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>;
-def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>;
+def LGB : UnaryRXY<"lgb", 0xE377, z_asextloadi8, GR64, 1>;
+def LGH : UnaryRXY<"lgh", 0xE315, z_asextloadi16, GR64, 2>;
+def LGF : UnaryRXY<"lgf", 0xE314, z_asextloadi32, GR64, 4>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_z_asextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_z_asextloadi32, GR64>;
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
- def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>;
+ def LTGF : UnaryRXY<"ltgf", 0xE332, z_asextloadi32, GR64, 4>;
//===----------------------------------------------------------------------===//
// Zero extensions
@@ -740,40 +740,40 @@ def : Pat<(and GR64:$src, 0xffffffff),
// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH,
// depending on the choice of register.
-def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
+def LLCMux : UnaryRXYPseudo<"llc", z_azextloadi8, GRX32, 1>,
Requires<[FeatureHighWord]>;
-def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
-def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>,
+def LLC : UnaryRXY<"llc", 0xE394, z_azextloadi8, GR32, 1>;
+def LLCH : UnaryRXY<"llch", 0xE3C2, z_azextloadi8, GRH32, 1>,
Requires<[FeatureHighWord]>;
// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH,
// depending on the choice of register.
-def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
+def LLHMux : UnaryRXYPseudo<"llh", z_azextloadi16, GRX32, 2>,
Requires<[FeatureHighWord]>;
-def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
-def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>,
+def LLH : UnaryRXY<"llh", 0xE395, z_azextloadi16, GR32, 2>;
+def LLHH : UnaryRXY<"llhh", 0xE3C6, z_azextloadi16, GRH32, 2>,
Requires<[FeatureHighWord]>;
-def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_z_azextloadi16, GR32>;
// 64-bit extensions from memory.
-def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>;
-def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>;
-def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>;
-def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>;
-def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>;
+def LLGC : UnaryRXY<"llgc", 0xE390, z_azextloadi8, GR64, 1>;
+def LLGH : UnaryRXY<"llgh", 0xE391, z_azextloadi16, GR64, 2>;
+def LLGF : UnaryRXY<"llgf", 0xE316, z_azextloadi32, GR64, 4>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_z_azextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_z_azextloadi32, GR64>;
// 31-to-64-bit zero extensions.
def LLGTR : UnaryRRE<"llgtr", 0xB917, null_frag, GR64, GR64>;
def LLGT : UnaryRXY<"llgt", 0xE317, null_frag, GR64, 4>;
def : Pat<(and GR64:$src, 0x7fffffff),
(LLGTR GR64:$src)>;
-def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0x7fffffff),
+def : Pat<(and (i64 (z_azextloadi32 bdxaddr20only:$src)), 0x7fffffff),
(LLGT bdxaddr20only:$src)>;
// Load and zero rightmost byte.
let Predicates = [FeatureLoadAndZeroRightmostByte] in {
def LLZRGF : UnaryRXY<"llzrgf", 0xE33A, null_frag, GR64, 4>;
- def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0xffffff00),
+ def : Pat<(and (i64 (z_azextloadi32 bdxaddr20only:$src)), 0xffffff00),
(LLZRGF bdxaddr20only:$src)>;
}
@@ -930,14 +930,14 @@ defm : SXU<ineg, LCGFR>;
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1 in
- defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>;
-defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>;
+ defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, z_azextloadi8, 1>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, z_azextloadi8, 1>;
-defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC32, GR32, z_azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, z_azextloadi8, bdxaddr20pair>;
-defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC, GR64, z_azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, z_azextloadi8, bdxaddr20pair>;
// Insert characters under mask -- not (yet) used for codegen.
let Defs = [CC] in {
@@ -1015,12 +1015,12 @@ let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in {
def AGFI : BinaryRIL<"agfi", 0xC28, z_sadd, GR64, imm64sx32>;
// Addition of memory.
- defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>;
- defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
- def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>,
+ defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, z_asextloadi16, 2>;
+ defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, z_load, 4>;
+ def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, z_asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>;
- def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>;
- defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>;
+ def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, z_asextloadi32, 4>;
+ defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, z_load, 8>;
// Addition to memory.
def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
@@ -1058,9 +1058,9 @@ let Defs = [CC], CCValues = 0xF, IsLogical = 1 in {
Requires<[FeatureHighWord]>;
// Addition of memory.
- defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
- def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>;
- defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>;
+ defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, z_load, 4>;
+ def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, z_azextloadi32, 4>;
+ defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, z_load, 8>;
// Addition to memory.
def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>;
@@ -1075,8 +1075,8 @@ let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in {
def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>;
// Addition of memory.
- def ALC : BinaryRXY<"alc", 0xE398, z_addcarry, GR32, load, 4>;
- def ALCG : BinaryRXY<"alcg", 0xE388, z_addcarry, GR64, load, 8>;
+ def ALC : BinaryRXY<"alc", 0xE398, z_addcarry, GR32, z_load, 4>;
+ def ALCG : BinaryRXY<"alcg", 0xE388, z_addcarry, GR64, z_load, 8>;
}
// Addition that does not modify the condition code.
@@ -1103,12 +1103,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8,
Requires<[FeatureHighWord]>;
// Subtraction of memory.
- defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>;
- defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
- def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>,
+ defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, z_asextloadi16, 2>;
+ defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, z_load, 4>;
+ def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, z_asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>;
- def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>;
- defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>;
+ def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, z_asextloadi32, 4>;
+ defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, z_load, 8>;
}
defm : SXB<z_ssub, GR64, SGFR>;
@@ -1156,9 +1156,9 @@ let Defs = [CC], CCValues = 0x7, IsLogical = 1 in {
def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>;
// Subtraction of memory.
- defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
- def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>;
- defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>;
+ defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, z_load, 4>;
+ def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, z_azextloadi32, 4>;
+ defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, z_load, 8>;
}
defm : ZXB<z_usub, GR64, SLGFR>;
@@ -1183,8 +1183,8 @@ let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in {
def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>;
// Subtraction of memory.
- def SLB : BinaryRXY<"slb", 0xE399, z_subcarry, GR32, load, 4>;
- def SLBG : BinaryRXY<"slbg", 0xE389, z_subcarry, GR64, load, 8>;
+ def SLB : BinaryRXY<"slb", 0xE399, z_subcarry, GR32, z_load, 4>;
+ def SLBG : BinaryRXY<"slbg", 0xE389, z_subcarry, GR64, z_load, 8>;
}
@@ -1233,8 +1233,8 @@ let Defs = [CC] in {
// ANDs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>;
- defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>;
+ defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, z_load, 4>;
+ defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, z_load, 8>;
}
// AND to memory
@@ -1290,8 +1290,8 @@ let Defs = [CC] in {
// ORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>;
- defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>;
+ defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, z_load, 4>;
+ defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, z_load, 8>;
}
// OR to memory
@@ -1330,8 +1330,8 @@ let Defs = [CC] in {
// XORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>;
- defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>;
+ defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, z_load, 4>;
+ defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, z_load, 8>;
}
// XOR to memory
@@ -1411,17 +1411,17 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>;
def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
// Multiplication of memory.
-defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>;
-defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>;
-def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, asextloadi16, 2>,
+defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, z_asextloadi16, 2>;
+defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, z_load, 4>;
+def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, z_asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>;
-def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>;
-def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
+def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, z_asextloadi32, 4>;
+def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, z_load, 8>;
// Multiplication of memory, setting the condition code.
let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in {
- defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, load, 4>;
- defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, load, 8>;
+ defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, z_load, 4>;
+ defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, z_load, 8>;
}
// Multiplication of a register, producing two results.
@@ -1437,16 +1437,16 @@ def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2),
(MLGR (AEXT128 GR64:$src1), GR64:$src2)>;
// Multiplication of memory, producing two results.
-def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>;
-def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>;
-def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>,
+def M : BinaryRX <"m", 0x5C, null_frag, GR128, z_load, 4>;
+def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, z_load, 4>;
+def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, z_load, 8>,
Requires<[FeatureMiscellaneousExtensions2]>;
-def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>;
-def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>;
+def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, z_load, 4>;
+def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, z_load, 8>;
-def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+def : Pat<(z_smul_lohi GR64:$src1, (i64 (z_load bdxaddr20only:$src2))),
(MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
-def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+def : Pat<(z_umul_lohi GR64:$src1, (i64 (z_load bdxaddr20only:$src2))),
(MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
//===----------------------------------------------------------------------===//
@@ -1462,30 +1462,30 @@ let hasSideEffects = 1 in { // Do not speculatively execute.
def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>;
// Division and remainder, from memory.
- def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>;
- def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, load, 4>;
- def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, load, 8>;
- def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, load, 4>;
- def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, load, 8>;
+ def D : BinaryRX <"d", 0x5D, null_frag, GR128, z_load, 4>;
+ def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, z_load, 4>;
+ def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, z_load, 8>;
+ def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, z_load, 4>;
+ def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, z_load, 8>;
}
def : Pat<(z_sdivrem GR64:$src1, GR32:$src2),
(DSGFR (AEXT128 GR64:$src1), GR32:$src2)>;
-def : Pat<(z_sdivrem GR64:$src1, (i32 (load bdxaddr20only:$src2))),
+def : Pat<(z_sdivrem GR64:$src1, (i32 (z_load bdxaddr20only:$src2))),
(DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
def : Pat<(z_sdivrem GR64:$src1, GR64:$src2),
(DSGR (AEXT128 GR64:$src1), GR64:$src2)>;
-def : Pat<(z_sdivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+def : Pat<(z_sdivrem GR64:$src1, (i64 (z_load bdxaddr20only:$src2))),
(DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
def : Pat<(z_udivrem GR32:$src1, GR32:$src2),
(DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1,
subreg_l32)), GR32:$src2)>;
-def : Pat<(z_udivrem GR32:$src1, (i32 (load bdxaddr20only:$src2))),
+def : Pat<(z_udivrem GR32:$src1, (i32 (z_load bdxaddr20only:$src2))),
(DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1,
subreg_l32)), bdxaddr20only:$src2)>;
def : Pat<(z_udivrem GR64:$src1, GR64:$src2),
(DLGR (ZEXT128 GR64:$src1), GR64:$src2)>;
-def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+def : Pat<(z_udivrem GR64:$src1, (i64 (z_load bdxaddr20only:$src2))),
(DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>;
//===----------------------------------------------------------------------===//
@@ -1591,25 +1591,25 @@ let Defs = [CC], CCValues = 0xE in {
def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>;
// Comparison with memory.
- defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>;
- def CMux : CompareRXYPseudo<z_scmp, GRX32, load, 4>,
+ defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, z_asextloadi16, 2>;
+ def CMux : CompareRXYPseudo<z_scmp, GRX32, z_load, 4>,
Requires<[FeatureHighWord]>;
- defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>;
- def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>,
+ defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, z_load, 4>;
+ def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, z_load, 4>,
Requires<[FeatureHighWord]>;
- def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>;
- def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>;
- def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>;
- def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>;
- def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>;
- def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>;
- def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>;
- def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>;
+ def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, z_asextloadi16, 2>;
+ def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, z_asextloadi32, 4>;
+ def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, z_load, 8>;
+ def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_z_asextloadi16>;
+ def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_z_load>;
+ def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_z_asextloadi16>;
+ def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_z_asextloadi32>;
+ def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_z_load>;
// Comparison between memory and a signed 16-bit immediate.
- def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>;
- def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>;
- def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>;
+ def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, z_asextloadi16, imm32sx16>;
+ def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, z_load, imm32sx16>;
+ def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, z_load, imm64sx16>;
}
defm : SXB<z_scmp, GR64, CGFR>;
@@ -1636,31 +1636,31 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
// Comparison with memory.
- def CLMux : CompareRXYPseudo<z_ucmp, GRX32, load, 4>,
+ def CLMux : CompareRXYPseudo<z_ucmp, GRX32, z_load, 4>,
Requires<[FeatureHighWord]>;
- defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>;
- def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>,
+ defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, z_load, 4>;
+ def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, z_load, 4>,
Requires<[FeatureHighWord]>;
- def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>;
- def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>;
+ def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, z_azextloadi32, 4>;
+ def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, z_load, 8>;
def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32,
- aligned_azextloadi16>;
+ aligned_z_azextloadi16>;
def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32,
- aligned_load>;
+ aligned_z_load>;
def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
- aligned_azextloadi16>;
+ aligned_z_azextloadi16>;
def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
- aligned_azextloadi32>;
+ aligned_z_azextloadi32>;
def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64,
- aligned_load>;
+ aligned_z_load>;
// Comparison between memory and an unsigned 8-bit immediate.
- defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>;
+ defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, z_azextloadi8, imm32zx8>;
// Comparison between memory and an unsigned 16-bit immediate.
- def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>;
- def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
- def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
+ def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, z_azextloadi16, imm32zx16>;
+ def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, z_load, imm32zx16>;
+ def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, z_load, imm64zx16>;
}
defm : ZXB<z_ucmp, GR64, CLGFR>;
@@ -1693,7 +1693,7 @@ let Defs = [CC] in {
def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>;
def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>;
- defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
+ defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, z_anyextloadi8, imm32zx8>;
}
def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>;
@@ -1914,8 +1914,8 @@ let Predicates = [FeatureGuardedStorage], hasSideEffects = 1 in {
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-defm CVB : BinaryRXPair<"cvb",0x4F, 0xE306, null_frag, GR32, load, 4>;
-def CVBG : BinaryRXY<"cvbg", 0xE30E, null_frag, GR64, load, 8>;
+defm CVB : BinaryRXPair<"cvb",0x4F, 0xE306, null_frag, GR32, z_load, 4>;
+def CVBG : BinaryRXY<"cvbg", 0xE30E, null_frag, GR64, z_load, 8>;
defm CVD : StoreRXPair<"cvd", 0x4E, 0xE326, null_frag, GR32, 4>;
def CVDG : StoreRXY<"cvdg", 0xE32E, null_frag, GR64, 8>;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 799b27d74414d5..245e3c3399a986 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -140,8 +140,8 @@ let Predicates = [FeatureVector] in {
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
let mayLoad = 1 in {
- def VL32 : UnaryAliasVRX<load, v32sb, bdxaddr12pair>;
- def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
+ def VL32 : UnaryAliasVRX<z_load, v32sb, bdxaddr12pair>;
+ def VL64 : UnaryAliasVRX<z_load, v64db, bdxaddr12pair>;
}
// Load logical element and zero.
@@ -198,12 +198,12 @@ multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype,
(scalartype (load bdxaddr12only:$addr)))),
(vlrep bdxaddr12only:$addr)>;
}
-defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
-defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
-defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
-defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
-defm : ReplicatePeephole<VLREPF, v4f32, load, f32>;
-defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
+defm : ReplicatePeephole<VLREPB, v16i8, z_anyextloadi8, i32>;
+defm : ReplicatePeephole<VLREPH, v8i16, z_anyextloadi16, i32>;
+defm : ReplicatePeephole<VLREPF, v4i32, z_load, i32>;
+defm : ReplicatePeephole<VLREPG, v2i64, z_load, i64>;
+defm : ReplicatePeephole<VLREPF, v4f32, z_load, f32>;
+defm : ReplicatePeephole<VLREPG, v2f64, z_load, f64>;
//===----------------------------------------------------------------------===//
// Stores
@@ -1561,13 +1561,13 @@ let Predicates = [FeatureVector] in {
// Any-extending loads into i128.
let Predicates = [FeatureVector] in {
- def : Pat<(i128 (extloadi8 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_extloadi8 bdxaddr12only:$addr)),
(VLREPB bdxaddr12only:$addr)>;
- def : Pat<(i128 (extloadi16 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_extloadi16 bdxaddr12only:$addr)),
(VLREPH bdxaddr12only:$addr)>;
- def : Pat<(i128 (extloadi32 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_extloadi32 bdxaddr12only:$addr)),
(VLREPF bdxaddr12only:$addr)>;
- def : Pat<(i128 (extloadi64 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_extloadi64 bdxaddr12only:$addr)),
(VLREPG bdxaddr12only:$addr)>;
}
@@ -1621,13 +1621,13 @@ let Predicates = [FeatureVector] in {
// Zero-extending loads into i128.
let Predicates = [FeatureVector] in {
- def : Pat<(i128 (zextloadi8 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_zextloadi8 bdxaddr12only:$addr)),
(VLEB (VGBM 0), bdxaddr12only:$addr, 15)>;
- def : Pat<(i128 (zextloadi16 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_zextloadi16 bdxaddr12only:$addr)),
(VLEH (VGBM 0), bdxaddr12only:$addr, 7)>;
- def : Pat<(i128 (zextloadi32 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_zextloadi32 bdxaddr12only:$addr)),
(VLEF (VGBM 0), bdxaddr12only:$addr, 3)>;
- def : Pat<(i128 (zextloadi64 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_zextloadi64 bdxaddr12only:$addr)),
(VLEG (VGBM 0), bdxaddr12only:$addr, 1)>;
}
@@ -1663,13 +1663,13 @@ let Predicates = [FeatureVector] in {
// Sign-extending loads into i128.
let Predicates = [FeatureVector] in {
- def : Pat<(i128 (sextloadi8 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_sextloadi8 bdxaddr12only:$addr)),
(VSRAB (VLREPB bdxaddr12only:$addr), (VREPIB 120))>;
- def : Pat<(i128 (sextloadi16 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_sextloadi16 bdxaddr12only:$addr)),
(VSRAB (VLREPH bdxaddr12only:$addr), (VREPIB 112))>;
- def : Pat<(i128 (sextloadi32 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_sextloadi32 bdxaddr12only:$addr)),
(VSRAB (VLREPF bdxaddr12only:$addr), (VREPIB 96))>;
- def : Pat<(i128 (sextloadi64 bdxaddr12only:$addr)),
+ def : Pat<(i128 (z_sextloadi64 bdxaddr12only:$addr)),
(VSRAB (VLREPG bdxaddr12only:$addr), (VREPIB 64))>;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index d98bb886c18506..fa292b1b01cd1e 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -534,37 +534,108 @@ def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
-// Extending loads in which the extension type can be signed.
-def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
- unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
- return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD;
+// Match a load or a non-extending atomic load.
+def z_load : PatFrags<(ops node:$ptr),
+ [(load node:$ptr),
+ (atomic_load node:$ptr)], [{
+ if (auto *AL = dyn_cast<AtomicSDNode>(N)) // XXXX getLoadExtType?
+ if (AL->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+ return true;
}]>;
-def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+
+// Sign extending (atomic) loads.
+def z_sextload : PatFrags<(ops node:$ptr),
+ [(unindexedload node:$ptr),
+ (atomic_load node:$ptr)], [{
+ return getLoadExtType(N) == ISD::SEXTLOAD;
}]>;
-def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+def z_sextloadi8 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
-def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+def z_sextloadi16 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_sextloadi32 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_sextloadi64 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64;
}]>;
-// Extending loads in which the extension type can be unsigned.
-def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
- unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
- return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD;
+// Zero extending (atomic) loads.
+def z_zextload : PatFrags<(ops node:$ptr),
+ [(unindexedload node:$ptr),
+ (atomic_load node:$ptr)], [{
+ return getLoadExtType(N) == ISD::ZEXTLOAD;
}]>;
-def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+def z_zextloadi8 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
-def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+def z_zextloadi16 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
-def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+def z_zextloadi32 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_zextloadi64 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+// Extending (atomic) loads in which the extension type can be signed.
+def z_asextload : PatFrags<(ops node:$ptr),
+ [(unindexedload node:$ptr),
+ (atomic_load node:$ptr)], [{
+ ISD::LoadExtType ETy = getLoadExtType(N);
+ return ETy == ISD::EXTLOAD || ETy == ISD::SEXTLOAD;
+}]>;
+def z_asextloadi8 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def z_asextloadi16 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_asextloadi32 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
-// Extending loads in which the extension type doesn't matter.
+// Extending (atomic) loads in which the extension type can be unsigned.
+def z_azextload : PatFrags<(ops node:$ptr),
+ [(unindexedload node:$ptr),
+ (atomic_load node:$ptr)], [{
+ ISD::LoadExtType ETy = getLoadExtType(N);
+ return ETy == ISD::EXTLOAD || ETy == ISD::ZEXTLOAD;
+}]>;
+def z_azextloadi8 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def z_azextloadi16 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_azextloadi32 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Extending (atomic) loads in which the extension type doesn't matter.
+def z_anyextload : PatFrags<(ops node:$ptr),
+ [(unindexedload node:$ptr),
+ (atomic_load node:$ptr)], [{
+ return getLoadExtType(N) != ISD::NON_EXTLOAD;
+}]>;
+def z_anyextloadi8 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def z_anyextloadi16 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_anyextloadi32 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_anyextloadi64 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+// Extending non-atomic loads in which the extension type doesn't matter.
def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
}]>;
@@ -578,15 +649,42 @@ def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
+// Extending (atomic) loads that are not sign/zero extending.
+def z_extload : PatFrags<(ops node:$ptr),
+ [(extload node:$ptr),
+ (atomic_load node:$ptr)], [{
+ return getLoadExtType(N) == ISD::EXTLOAD;
+}]>;
+def z_extloadi8 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def z_extloadi16 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_extloadi32 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_extloadi64 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{
+ return cast<MemSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+// Extending atomic FP loads.
+def z_any_extloadf32 : PatFrags<(ops node:$ptr),
+ [(any_extloadf32 node:$ptr),
+ (any_fpextend (f32 (atomic_load node:$ptr)))]>;
+def z_any_extloadf64 : PatFrags<(ops node:$ptr),
+ [(any_extloadf64 node:$ptr),
+ (any_fpextend (f64 (atomic_load node:$ptr)))]>;
+
// Aligned loads.
class AlignedLoad<SDPatternOperator load>
: PatFrag<(ops node:$addr), (load node:$addr),
[{ return storeLoadIsAligned(N); }]>;
-def aligned_load : AlignedLoad<load>;
-def aligned_asextloadi16 : AlignedLoad<asextloadi16>;
-def aligned_asextloadi32 : AlignedLoad<asextloadi32>;
-def aligned_azextloadi16 : AlignedLoad<azextloadi16>;
-def aligned_azextloadi32 : AlignedLoad<azextloadi32>;
+def aligned_z_load : AlignedLoad<z_load>;
+def aligned_z_asextloadi16 : AlignedLoad<z_asextloadi16>;
+def aligned_z_asextloadi32 : AlignedLoad<z_asextloadi32>;
+def aligned_z_azextloadi16 : AlignedLoad<z_azextloadi16>;
+def aligned_z_azextloadi32 : AlignedLoad<z_azextloadi32>;
// Aligned stores.
class AlignedStore<SDPatternOperator store>
@@ -749,7 +847,7 @@ def z_any_vround : PatFrags<(ops node:$src),
// Create a unary operator that loads from memory and then performs
// the given operation on it.
-class loadu<SDPatternOperator operator, SDPatternOperator load = load>
+class loadu<SDPatternOperator operator, SDPatternOperator load = z_load>
: PatFrag<(ops node:$addr), (operator (load node:$addr))>;
// Create a store operator that performs the given unary operation
@@ -799,12 +897,12 @@ def imm32nobytes : PatLeaf<(i32 imm), [{
class z_replicate_load<ValueType scalartype, SDPatternOperator load>
: PatFrag<(ops node:$addr),
(z_replicate (scalartype (load node:$addr)))>;
-def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>;
-def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
-def z_replicate_loadi32 : z_replicate_load<i32, load>;
-def z_replicate_loadi64 : z_replicate_load<i64, load>;
-def z_replicate_loadf32 : z_replicate_load<f32, load>;
-def z_replicate_loadf64 : z_replicate_load<f64, load>;
+def z_replicate_loadi8 : z_replicate_load<i32, z_anyextloadi8>;
+def z_replicate_loadi16 : z_replicate_load<i32, z_anyextloadi16>;
+def z_replicate_loadi32 : z_replicate_load<i32, z_load>;
+def z_replicate_loadi64 : z_replicate_load<i64, z_load>;
+def z_replicate_loadf32 : z_replicate_load<f32, z_load>;
+def z_replicate_loadf64 : z_replicate_load<f64, z_load>;
// Byte-swapped replicated vector element loads.
def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>;
def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>;
@@ -815,12 +913,12 @@ class z_vle<ValueType scalartype, SDPatternOperator load>
: PatFrag<(ops node:$vec, node:$addr, node:$index),
(z_vector_insert node:$vec, (scalartype (load node:$addr)),
node:$index)>;
-def z_vlei8 : z_vle<i32, anyextloadi8>;
-def z_vlei16 : z_vle<i32, anyextloadi16>;
-def z_vlei32 : z_vle<i32, load>;
-def z_vlei64 : z_vle<i64, load>;
-def z_vlef32 : z_vle<f32, load>;
-def z_vlef64 : z_vle<f64, load>;
+def z_vlei8 : z_vle<i32, z_anyextloadi8>;
+def z_vlei16 : z_vle<i32, z_anyextloadi16>;
+def z_vlei32 : z_vle<i32, z_load>;
+def z_vlei64 : z_vle<i64, z_load>;
+def z_vlef32 : z_vle<f32, z_load>;
+def z_vlef64 : z_vle<f64, z_load>;
// Byte-swapped vector element loads.
def z_vlebri16 : z_vle<i32, z_loadbswap16>;
def z_vlebri32 : z_vle<i32, z_loadbswap32>;
@@ -832,13 +930,13 @@ class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
: PatFrag<(ops node:$addr),
(z_vector_insert immAllZerosV,
(scalartype (load node:$addr)), (i32 index))>;
-def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
-def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
-def z_vllezi32 : z_vllez<i32, load, 1>;
+def z_vllezi8 : z_vllez<i32, z_anyextloadi8, 7>;
+def z_vllezi16 : z_vllez<i32, z_anyextloadi16, 3>;
+def z_vllezi32 : z_vllez<i32, z_load, 1>;
def z_vllezi64 : PatFrags<(ops node:$addr),
[(z_vector_insert immAllZerosV,
- (i64 (load node:$addr)), (i32 0)),
- (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
+ (i64 (z_load node:$addr)), (i32 0)),
+ (z_join_dwords (i64 (z_load node:$addr)), (i64 0))]>;
// We use high merges to form a v4f32 from four f32s. Propagating zero
// into all elements but index 1 gives this expression.
def z_vllezf32 : PatFrag<(ops node:$addr),
@@ -848,23 +946,23 @@ def z_vllezf32 : PatFrag<(ops node:$addr),
(v4i32
(bitconvert
(v4f32 (scalar_to_vector
- (f32 (load node:$addr)))))))),
+ (f32 (z_load node:$addr)))))))),
(v2i64
(bitconvert (v4f32 immAllZerosV))))>;
def z_vllezf64 : PatFrag<(ops node:$addr),
(z_merge_high
- (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
+ (v2f64 (scalar_to_vector (f64 (z_load node:$addr)))),
immAllZerosV)>;
// Similarly for the high element of a zeroed vector.
-def z_vllezli32 : z_vllez<i32, load, 0>;
+def z_vllezli32 : z_vllez<i32, z_load, 0>;
def z_vllezlf32 : PatFrag<(ops node:$addr),
(z_merge_high
(v2i64
(bitconvert
(z_merge_high
(v4f32 (scalar_to_vector
- (f32 (load node:$addr)))),
+ (f32 (z_load node:$addr)))),
(v4f32 immAllZerosV)))),
(v2i64
(bitconvert (v4f32 immAllZerosV))))>;
diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td
index 5e5dca77e9553b..4d6bc68e9a7edd 100644
--- a/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -49,8 +49,8 @@ class RMWI<SDPatternOperator load, SDPatternOperator operator,
// memory location. IMM is the type of the second operand.
multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
Instruction insn> {
- def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>;
- def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>;
+ def : RMWI<z_anyextloadi8, operator, truncstorei8, mode, imm32, insn>;
+ def : RMWI<z_anyextloadi8, operator, truncstorei8, mode, imm64, insn>;
}
// Record that INSN performs insertion TYPE into a register of class CLS.
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 1e548d7c101a7a..3bb4636e07b8d1 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1789,10 +1789,10 @@ defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>;
multiclass ATMLDm<SDPatternOperator from,
RM torri, RM torii,
RM tozri, RM tozii> {
- def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>;
- def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>;
- def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>;
- def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>;
+ def : Pat<(iAny (from ADDRrri:$addr)), (torri MEMrri:$addr)>;
+ def : Pat<(iAny (from ADDRrii:$addr)), (torii MEMrii:$addr)>;
+ def : Pat<(iAny (from ADDRzri:$addr)), (tozri MEMzri:$addr)>;
+ def : Pat<(iAny (from ADDRzii:$addr)), (tozii MEMzii:$addr)>;
}
defm : ATMLDm<atomic_load_8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
defm : ATMLDm<atomic_load_16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
index d75f15a574f7ef..60ff780df87b0e 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll
@@ -4,7 +4,7 @@
define float @f1(ptr %src) {
; CHECK-LABEL: f1:
-; CHECK: le %f0
+; CHECK: le %f0, 0(%r2)
; CHECK: br %r14
%val = load atomic float, ptr %src seq_cst, align 4
ret float %val
diff --git a/llvm/test/CodeGen/SystemZ/atomic-memops-fp128.ll b/llvm/test/CodeGen/SystemZ/atomic-memops-fp128.ll
new file mode 100644
index 00000000000000..8038329c0e09a0
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/atomic-memops-fp128.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+;
+; Test fpext of atomic loads to fp128 without VectorEnhancements1 (using FP register pairs).
+
+define fp128 @f1(ptr %src) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxeb %f0, 0(%r3)
+; CHECK-NEXT: std %f0, 0(%r2)
+; CHECK-NEXT: std %f2, 8(%r2)
+; CHECK-NEXT: br %r14
+ %V = load atomic float, ptr %src seq_cst, align 4
+ %Res = fpext float %V to fp128
+ ret fp128 %Res
+}
+
+define fp128 @f2(ptr %src) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxdb %f0, 0(%r3)
+; CHECK-NEXT: std %f0, 0(%r2)
+; CHECK-NEXT: std %f2, 8(%r2)
+; CHECK-NEXT: br %r14
+ %V = load atomic double, ptr %src seq_cst, align 8
+ %Res = fpext double %V to fp128
+ ret fp128 %Res
+}
+
+
+
diff --git a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll b/llvm/test/CodeGen/SystemZ/atomic-memops.ll
similarity index 94%
rename from llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
rename to llvm/test/CodeGen/SystemZ/atomic-memops.ll
index fa1578df04bec1..9d799630ae8f8f 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-memops.ll
@@ -182,6 +182,41 @@ define float @f15(float %f1, ptr %ptr, float %acc) {
}
declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
+define double @f15_b(ptr %src) {
+; CHECK-LABEL: f15_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ldeb %f0, 0(%r2)
+; CHECK-NEXT: br %r14
+ %V = load atomic float, ptr %src seq_cst, align 4
+ %Res = fpext float %V to double
+ ret double %Res
+}
+
+define fp128 @f15_c(ptr %src) {
+; CHECK-LABEL: f15_c:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lde %f0, 0(%r3)
+; CHECK-NEXT: ldebr %f0, %f0
+; CHECK-NEXT: wflld %v0, %f0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %V = load atomic float, ptr %src seq_cst, align 4
+ %Res = fpext float %V to fp128
+ ret fp128 %Res
+}
+
+define fp128 @f15_d(ptr %src) {
+; CHECK-LABEL: f15_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld %f0, 0(%r3)
+; CHECK-NEXT: wflld %v0, %f0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %V = load atomic double, ptr %src seq_cst, align 8
+ %Res = fpext double %V to fp128
+ ret fp128 %Res
+}
+
; Do it twice for good measure given the involved DAG combines.
define void @f16(ptr %src, ptr %dst) {
; CHECK-LABEL: f16:
@@ -299,10 +334,10 @@ define double @f20(ptr %src, double %a, double %b) {
; CHECK-LABEL: f20:
; CHECK: # %bb.0:
; CHECK-NEXT: tm 0(%r2), 1
-; CHECK-NEXT: je .LBB22_2
+; CHECK-NEXT: je .LBB25_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: ldr %f2, %f0
-; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: ldr %f0, %f2
; CHECK-NEXT: br %r14
%byte = load atomic i8, ptr %src seq_cst, align 1
@@ -641,7 +676,7 @@ define void @f43(ptr %ptr) {
define void @f44(ptr %ptr) {
; CHECK-LABEL: f44:
; CHECK: # %bb.0:
-; CHECK-NEXT: larl %r1, .LCPI50_0
+; CHECK-NEXT: larl %r1, .LCPI53_0
; CHECK-NEXT: ld %f0, 0(%r1)
; CHECK-NEXT: std %f0, 0(%r2)
; CHECK-NEXT: bcr 14, %r0
@@ -755,3 +790,14 @@ define void @f52(ptr %src, ptr %dst) {
store atomic i64 %b1, ptr %dst seq_cst, align 8
ret void
}
+
+define void @fun58(ptr %ptr, i64 %arg) {
+; CHECK-LABEL: fun58:
+; CHECK: # %bb.0:
+; CHECK-NEXT: st %r3, 0(%r2)
+; CHECK-NEXT: bcr 14, %r0
+; CHECK-NEXT: br %r14
+ %res = trunc i64 %arg to i32
+ store atomic i32 %res, ptr %ptr seq_cst, align 4
+ ret void
+}
More information about the llvm-commits
mailing list