[llvm] 52f03fe - [VE] Support atomic fence

Kazushi Marukawa via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 26 01:03:17 PDT 2020


Author: Kazushi (Jam) Marukawa
Date: 2020-10-26T17:03:09+09:00
New Revision: 52f03fe1151f65278c855651bb8f325cca8500ea

URL: https://github.com/llvm/llvm-project/commit/52f03fe1151f65278c855651bb8f325cca8500ea
DIFF: https://github.com/llvm/llvm-project/commit/52f03fe1151f65278c855651bb8f325cca8500ea.diff

LOG: [VE] Support atomic fence

Support atomic fence instruction and add a regression test.
Add MEMBARRIER pseudo insturction also to use it as a barrier
against to the compiler optimizations.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D90112

Added: 
    llvm/test/CodeGen/VE/atomic_fence.ll

Modified: 
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.h
    llvm/lib/Target/VE/VEInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 150db3ab6446..115540bb862f 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -817,6 +817,17 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
 
   /// } Floating-point math functions
 
+  /// Atomic instructions {
+
+  setMaxAtomicSizeInBitsSupported(64);
+  setMinCmpXchgSizeInBits(32);
+  setSupportsUnalignedAtomics(false);
+
+  // Use custom inserter for ATOMIC_FENCE.
+  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+  /// } Atomic isntructions
+
   setStackPointerRegisterToSaveRestore(VE::SX11);
 
   // We have target-specific dag combine patterns for the following nodes:
@@ -843,6 +854,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
     TARGET_NODE_CASE(GETFUNPLT)
     TARGET_NODE_CASE(GETSTACKTOP)
     TARGET_NODE_CASE(GETTLSADDR)
+    TARGET_NODE_CASE(MEMBARRIER)
     TARGET_NODE_CASE(CALL)
     TARGET_NODE_CASE(RET_FLAG)
     TARGET_NODE_CASE(GLOBAL_BASE_REG)
@@ -945,6 +957,51 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
 
 /// Custom Lower {
 
+SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
+  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
+      cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+  // VE uses Release consistency, so need a fence instruction if it is a
+  // cross-thread fence.
+  if (FenceSSID == SyncScope::System) {
+    switch (FenceOrdering) {
+    case AtomicOrdering::NotAtomic:
+    case AtomicOrdering::Unordered:
+    case AtomicOrdering::Monotonic:
+      // No need to generate fencem instruction here.
+      break;
+    case AtomicOrdering::Acquire:
+      // Generate "fencem 2" as acquire fence.
+      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+                                        DAG.getTargetConstant(2, DL, MVT::i32),
+                                        Op.getOperand(0)),
+                     0);
+    case AtomicOrdering::Release:
+      // Generate "fencem 1" as release fence.
+      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+                                        DAG.getTargetConstant(1, DL, MVT::i32),
+                                        Op.getOperand(0)),
+                     0);
+    case AtomicOrdering::AcquireRelease:
+    case AtomicOrdering::SequentiallyConsistent:
+      // Generate "fencem 3" as acq_rel and seq_cst fence.
+      // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
+      //        so  seq_cst may require more instruction for them.
+      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
+                                        DAG.getTargetConstant(3, DL, MVT::i32),
+                                        Op.getOperand(0)),
+                     0);
+    }
+  }
+
+  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+  return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   return makeAddress(Op, DAG);
@@ -1263,6 +1320,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default:
     llvm_unreachable("Should not custom lower this!");
+  case ISD::ATOMIC_FENCE:
+    return lowerATOMIC_FENCE(Op, DAG);
   case ISD::BlockAddress:
     return lowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:

diff  --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 19c739eb8b40..d0fea0107594 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -32,6 +32,8 @@ enum NodeType : unsigned {
   GETSTACKTOP, // retrieve address of stack top (first address of
                // locals and temporaries)
 
+  MEMBARRIER, // Compiler barrier only; generate a no-op.
+
   CALL,            // A call instruction.
   RET_FLAG,        // Return with a flag operand.
   GLOBAL_BASE_REG, // Global base reg for PIC.
@@ -77,6 +79,7 @@ class VETargetLowering : public TargetLowering {
   /// Custom Lower {
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
+  SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 65dbb68ce176..05e2a86e4d22 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -442,6 +442,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
 def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
                         [SDNPHasChain, SDNPSideEffect]>;
 
+// MEMBARRIER
+def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
+                        [SDNPHasChain, SDNPSideEffect]>;
 
 //===----------------------------------------------------------------------===//
 // VE Flag Conditions
@@ -1782,6 +1785,14 @@ def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
                          "# GET STACK TOP",
                          [(set iPTR:$dst, (GetStackTop))]>;
 
+// MEMBARRIER
+let hasSideEffects = 1 in
+def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >;
+
+//===----------------------------------------------------------------------===//
+// Other patterns
+//===----------------------------------------------------------------------===//
+
 // SETCC pattern matches
 //
 //   CMP  %tmp, lhs, rhs     ; compare lhs and rhs

diff  --git a/llvm/test/CodeGen/VE/atomic_fence.ll b/llvm/test/CodeGen/VE/atomic_fence.ll
new file mode 100644
index 000000000000..0835e6b6e53a
--- /dev/null
+++ b/llvm/test/CodeGen/VE/atomic_fence.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=ve | FileCheck %s
+
+;;; Test atomic fence for all memory order
+
+; Function Attrs: norecurse nounwind readnone
+define void @_Z20atomic_fence_relaxedv() {
+; CHECK-LABEL: _Z20atomic_fence_relaxedv:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s11, 0, %s9
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_consumev() {
+; CHECK-LABEL: _Z20atomic_fence_consumev:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    or %s11, 0, %s9
+  fence acquire
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_acquirev() {
+; CHECK-LABEL: _Z20atomic_fence_acquirev:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    fencem 2
+; CHECK-NEXT:    or %s11, 0, %s9
+  fence acquire
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_releasev() {
+; CHECK-LABEL: _Z20atomic_fence_releasev:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    fencem 1
+; CHECK-NEXT:    or %s11, 0, %s9
+  fence release
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_acq_relv() {
+; CHECK-LABEL: _Z20atomic_fence_acq_relv:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    or %s11, 0, %s9
+  fence acq_rel
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind
+define void @_Z20atomic_fence_seq_cstv() {
+; CHECK-LABEL: _Z20atomic_fence_seq_cstv:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    fencem 3
+; CHECK-NEXT:    or %s11, 0, %s9
+  fence seq_cst
+  ret void
+}


        


More information about the llvm-commits mailing list