[llvm] 52f03fe - [VE] Support atomic fence
Kazushi Marukawa via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 26 01:03:17 PDT 2020
Author: Kazushi (Jam) Marukawa
Date: 2020-10-26T17:03:09+09:00
New Revision: 52f03fe1151f65278c855651bb8f325cca8500ea
URL: https://github.com/llvm/llvm-project/commit/52f03fe1151f65278c855651bb8f325cca8500ea
DIFF: https://github.com/llvm/llvm-project/commit/52f03fe1151f65278c855651bb8f325cca8500ea.diff
LOG: [VE] Support atomic fence
Support atomic fence instruction and add a regression test.
Add MEMBARRIER pseudo insturction also to use it as a barrier
against to the compiler optimizations.
Reviewed By: simoll
Differential Revision: https://reviews.llvm.org/D90112
Added:
llvm/test/CodeGen/VE/atomic_fence.ll
Modified:
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 150db3ab6446..115540bb862f 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -817,6 +817,17 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
/// } Floating-point math functions
+ /// Atomic instructions {
+
+ setMaxAtomicSizeInBitsSupported(64);
+ setMinCmpXchgSizeInBits(32);
+ setSupportsUnalignedAtomics(false);
+
+ // Use custom inserter for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ /// } Atomic isntructions
+
setStackPointerRegisterToSaveRestore(VE::SX11);
// We have target-specific dag combine patterns for the following nodes:
@@ -843,6 +854,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(GETFUNPLT)
TARGET_NODE_CASE(GETSTACKTOP)
TARGET_NODE_CASE(GETTLSADDR)
+ TARGET_NODE_CASE(MEMBARRIER)
TARGET_NODE_CASE(CALL)
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(GLOBAL_BASE_REG)
@@ -945,6 +957,51 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
/// Custom Lower {
+SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+ SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // VE uses Release consistency, so need a fence instruction if it is a
+ // cross-thread fence.
+ if (FenceSSID == SyncScope::System) {
+ switch (FenceOrdering) {
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
+ case AtomicOrdering::Monotonic:
+ // No need to generate fencem instruction here.
+ break;
+ case AtomicOrdering::Acquire:
+ // Generate "fencem 2" as acquire fence.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(2, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ case AtomicOrdering::Release:
+ // Generate "fencem 1" as release fence.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(1, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ // Generate "fencem 3" as acq_rel and seq_cst fence.
+ // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
+ // so seq_cst may require more instruction for them.
+ return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+ DAG.getTargetConstant(3, DL, MVT::i32),
+ Op.getOperand(0)),
+ 0);
+ }
+ }
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
@@ -1263,6 +1320,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("Should not custom lower this!");
+ case ISD::ATOMIC_FENCE:
+ return lowerATOMIC_FENCE(Op, DAG);
case ISD::BlockAddress:
return lowerBlockAddress(Op, DAG);
case ISD::ConstantPool:
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 19c739eb8b40..d0fea0107594 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -32,6 +32,8 @@ enum NodeType : unsigned {
GETSTACKTOP, // retrieve address of stack top (first address of
// locals and temporaries)
+ MEMBARRIER, // Compiler barrier only; generate a no-op.
+
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
GLOBAL_BASE_REG, // Global base reg for PIC.
@@ -77,6 +79,7 @@ class VETargetLowering : public TargetLowering {
/// Custom Lower {
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 65dbb68ce176..05e2a86e4d22 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -442,6 +442,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
+// MEMBARRIER
+def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
//===----------------------------------------------------------------------===//
// VE Flag Conditions
@@ -1782,6 +1785,14 @@ def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
"# GET STACK TOP",
[(set iPTR:$dst, (GetStackTop))]>;
+// MEMBARRIER
+let hasSideEffects = 1 in
+def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >;
+
+//===----------------------------------------------------------------------===//
+// Other patterns
+//===----------------------------------------------------------------------===//
+
// SETCC pattern matches
//
// CMP %tmp, lhs, rhs ; compare lhs and rhs
diff --git a/llvm/test/CodeGen/VE/atomic_fence.ll b/llvm/test/CodeGen/VE/atomic_fence.ll
new file mode 100644
index 000000000000..0835e6b6e53a
--- /dev/null
+++ b/llvm/test/CodeGen/VE/atomic_fence.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+;;; Test atomic fence for all memory order
+
+; Function Attrs: norecurse nounwind readnone
+define void @_Z20atomic_fence_relaxedv() {
+; CHECK-LABEL: _Z20atomic_fence_relaxedv:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s11, 0, %s9
+ ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_consumev() {
+; CHECK-LABEL: _Z20atomic_fence_consumev:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: fencem 2
+; CHECK-NEXT: or %s11, 0, %s9
+ fence acquire
+ ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_acquirev() {
+; CHECK-LABEL: _Z20atomic_fence_acquirev:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: fencem 2
+; CHECK-NEXT: or %s11, 0, %s9
+ fence acquire
+ ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_releasev() {
+; CHECK-LABEL: _Z20atomic_fence_releasev:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: fencem 1
+; CHECK-NEXT: or %s11, 0, %s9
+ fence release
+ ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_acq_relv() {
+; CHECK-LABEL: _Z20atomic_fence_acq_relv:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: or %s11, 0, %s9
+ fence acq_rel
+ ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_seq_cstv() {
+; CHECK-LABEL: _Z20atomic_fence_seq_cstv:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: fencem 3
+; CHECK-NEXT: or %s11, 0, %s9
+ fence seq_cst
+ ret void
+}
More information about the llvm-commits
mailing list