[llvm] ecf5b78 - [NVPTX] Enable AtomicExpandPass for NVPTX

Fri May 20 14:25:31 PDT 2022

Author: Shilei Tian
Date: 2022-05-20T17:25:28-04:00
New Revision: ecf5b780538ecb643462fcb522440ea65d82483c

URL: https://github.com/llvm/llvm-project/commit/ecf5b780538ecb643462fcb522440ea65d82483c
DIFF: https://github.com/llvm/llvm-project/commit/ecf5b780538ecb643462fcb522440ea65d82483c.diff

LOG: [NVPTX] Enable AtomicExpandPass for NVPTX

This patch enables `AtomicExpandPass` for NVPTX.

Depend on D125652.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D125639

Added: 
    llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll

Modified: 
    llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
    llvm/lib/Target/NVPTX/NVPTXISelLowering.h
    llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3c357a440a324..f4afab300faa4 100644

--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -589,6 +589,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   // Now deduce the information based on the above mentioned
   // actions
   computeRegisterProperties(STI.getRegisterInfo());
+
+  setMinCmpXchgSizeInBits(32);
 }
 
 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -5125,6 +5127,67 @@ void NVPTXTargetLowering::ReplaceNodeResults(
   }
 }
 
+NVPTXTargetLowering::AtomicExpansionKind
+NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  Type *Ty = AI->getValOperand()->getType();
+
+  if (AI->isFloatingPointOperation()) {
+    if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
+      if (Ty->isFloatTy())
+        return AtomicExpansionKind::None;
+      if (Ty->isDoubleTy() && STI.hasAtomAddF64())
+        return AtomicExpansionKind::None;
+    }
+    return AtomicExpansionKind::CmpXChg;
+  }
+
+  assert(Ty->isIntegerTy() && "Ty should be integer at this point");
+  auto ITy = cast<llvm::IntegerType>(Ty);
+
+  switch (AI->getOperation()) {
+  default:
+    return AtomicExpansionKind::CmpXChg;
+  case AtomicRMWInst::BinOp::And:
+  case AtomicRMWInst::BinOp::Or:
+  case AtomicRMWInst::BinOp::Xor:
+  case AtomicRMWInst::BinOp::Xchg:
+    switch (ITy->getBitWidth()) {
+    case 8:
+    case 16:
+      return AtomicExpansionKind::CmpXChg;
+    case 32:
+      return AtomicExpansionKind::None;
+    case 64:
+      if (STI.hasAtomBitwise64())
+        return AtomicExpansionKind::None;
+      return AtomicExpansionKind::CmpXChg;
+    default:
+      llvm_unreachable("unsupported width encountered");
+    }
+  case AtomicRMWInst::BinOp::Add:
+  case AtomicRMWInst::BinOp::Sub:
+  case AtomicRMWInst::BinOp::Max:
+  case AtomicRMWInst::BinOp::Min:
+  case AtomicRMWInst::BinOp::UMax:
+  case AtomicRMWInst::BinOp::UMin:
+    switch (ITy->getBitWidth()) {
+    case 8:
+    case 16:
+      return AtomicExpansionKind::CmpXChg;
+    case 32:
+      return AtomicExpansionKind::None;
+    case 64:
+      if (STI.hasAtomMinMax64())
+        return AtomicExpansionKind::None;
+      return AtomicExpansionKind::CmpXChg;
+    default:
+      llvm_unreachable("unsupported width encountered");
+    }
+  }
+
+  return AtomicExpansionKind::CmpXChg;
+}
+
 // Pin NVPTXTargetObjectFile's vtables to this file.
 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 18a697deacb44..fb09f99a019d0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -561,6 +561,17 @@ class NVPTXTargetLowering : public TargetLowering {
   // instruction, so we say that ctlz is cheap to speculate.
   bool isCheapToSpeculateCtlz() const override { return true; }
 
+  AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
+    return AtomicExpansionKind::None;
+  }
+
+  AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override {
+    return AtomicExpansionKind::None;
+  }
+
+  AtomicExpansionKind
+  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
 private:
   const NVPTXSubtarget &STI; // cache the subtarget here
   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;

diff  --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 7285bbe63d8e3..597b8af176a2a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -330,6 +330,8 @@ void NVPTXPassConfig::addIRPasses() {
     addStraightLineScalarOptimizationPasses();
   }
 
+  addPass(createAtomicExpandPass());
+
   // === LSR and other generic IR passes ===
   TargetPassConfig::addIRPasses();
   // EarlyCSE is not always strong enough to clean up what LSR produces. For

diff  --git a/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
new file mode 100644
index 0000000000000..a2512dfbf5811
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/atomicrmw-expand.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefixes=ALL,SM30
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=ALL,SM60
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
+
+; CHECK-LABEL: fadd_double
+define void @fadd_double(ptr %0, double %1) {
+entry:
+  ; SM30: atom.cas.b64
+  ; SM60: atom.add.f64
+  %2 = atomicrmw fadd ptr %0, double %1 monotonic, align 8
+  ret void
+}
+
+; CHECK-LABEL: fadd_float
+define void @fadd_float(ptr %0, float %1) {
+entry:
+  ; ALL: atom.add.f32
+  %2 = atomicrmw fadd ptr %0, float %1 monotonic, align 4
+  ret void
+}
+
+; CHECK-LABEL: bitwise_i32
+define void @bitwise_i32(ptr %0, i32 %1) {
+entry:
+  ; ALL: atom.and.b32
+  %2 = atomicrmw and ptr %0, i32 %1 monotonic, align 4
+  ; ALL: atom.or.b32
+  %3 = atomicrmw or ptr %0, i32 %1 monotonic, align 4
+  ; ALL: atom.xor.b32
+  %4 = atomicrmw xor ptr %0, i32 %1 monotonic, align 4
+  ; ALL: atom.exch.b32
+  %5 = atomicrmw xchg ptr %0, i32 %1 monotonic, align 4
+  ret void
+}
+
+; CHECK-LABEL: bitwise_i64
+define void @bitwise_i64(ptr %0, i64 %1) {
+entry:
+  ; SM30: atom.cas.b64
+  ; SM60: atom.and.b64
+  %2 = atomicrmw and ptr %0, i64 %1 monotonic, align 8
+  ; SM30: atom.cas.b64
+  ; SM60: atom.or.b64
+  %3 = atomicrmw or ptr %0, i64 %1 monotonic, align 8
+  ; SM30: atom.cas.b64
+  ; SM60: atom.xor.b64
+  %4 = atomicrmw xor ptr %0, i64 %1 monotonic, align 8
+  ; SM30: atom.cas.b64
+  ; SM60: atom.exch.b64
+  %5 = atomicrmw xchg ptr %0, i64 %1 monotonic, align 8
+  ret void
+}
+
+; CHECK-LABEL: minmax_i32
+define void @minmax_i32(ptr %0, i32 %1) {
+entry:
+  ; ALL: atom.min.s32
+  %2 = atomicrmw min ptr %0, i32 %1 monotonic, align 4
+  ; ALL: atom.max.s32
+  %3 = atomicrmw max ptr %0, i32 %1 monotonic, align 4
+  ; ALL: atom.min.u32
+  %4 = atomicrmw umin ptr %0, i32 %1 monotonic, align 4
+  ; ALL: atom.max.u32
+  %5 = atomicrmw umax ptr %0, i32 %1 monotonic, align 4
+  ret void
+}
+
+; CHECK-LABEL: minmax_i64
+define void @minmax_i64(ptr %0, i64 %1) {
+entry:
+  ; SM30: atom.cas.b64
+  ; SM60: atom.min.s64
+  %2 = atomicrmw min ptr %0, i64 %1 monotonic, align 8
+  ; SM30: atom.cas.b64
+  ; SM60: atom.max.s64
+  %3 = atomicrmw max ptr %0, i64 %1 monotonic, align 8
+  ; SM30: atom.cas.b64
+  ; SM60: atom.min.u64
+  %4 = atomicrmw umin ptr %0, i64 %1 monotonic, align 8
+  ; SM30: atom.cas.b64
+  ; SM60: atom.max.u64
+  %5 = atomicrmw umax ptr %0, i64 %1 monotonic, align 8
+  ret void
+}
+
+; CHECK-LABEL: bitwise_i8
+define void @bitwise_i8(ptr %0, i8 %1) {
+entry:
+  ; ALL: atom.and.b32
+  %2 = atomicrmw and ptr %0, i8 %1 monotonic, align 1
+  ; ALL: atom.or.b32
+  %3 = atomicrmw or ptr %0, i8 %1 monotonic, align 1
+  ; ALL: atom.xor.b32
+  %4 = atomicrmw xor ptr %0, i8 %1 monotonic, align 1
+  ; ALL: atom.cas.b32
+  %5 = atomicrmw xchg ptr %0, i8 %1 monotonic, align 1
+  ret void
+}
+
+; CHECK-LABEL: minmax_i8
+define void @minmax_i8(ptr %0, i8 %1) {
+entry:
+  ; ALL: atom.cas.b32
+  %2 = atomicrmw min ptr %0, i8 %1 monotonic, align 1
+  ; ALL: atom.cas.b32
+  %3 = atomicrmw max ptr %0, i8 %1 monotonic, align 1
+  ; ALL: atom.cas.b32
+  %4 = atomicrmw umin ptr %0, i8 %1 monotonic, align 1
+  ; ALL: atom.cas.b32
+  %5 = atomicrmw umax ptr %0, i8 %1 monotonic, align 1
+  ret void
+}
+
+; CHECK-LABEL: bitwise_i16
+define void @bitwise_i16(ptr %0, i16 %1) {
+entry:
+  ; ALL: atom.and.b32
+  %2 = atomicrmw and ptr %0, i16 %1 monotonic, align 2
+  ; ALL: atom.or.b32
+  %3 = atomicrmw or ptr %0, i16 %1 monotonic, align 2
+  ; ALL: atom.xor.b32
+  %4 = atomicrmw xor ptr %0, i16 %1 monotonic, align 2
+  ; ALL: atom.cas.b32
+  %5 = atomicrmw xchg ptr %0, i16 %1 monotonic, align 2
+  ret void
+}
+
+; CHECK-LABEL: minmax_i16
+define void @minmax_i16(ptr %0, i16 %1) {
+entry:
+  ; ALL: atom.cas.b32
+  %2 = atomicrmw min ptr %0, i16 %1 monotonic, align 2
+  ; ALL: atom.cas.b32
+  %3 = atomicrmw max ptr %0, i16 %1 monotonic, align 2
+  ; ALL: atom.cas.b32
+  %4 = atomicrmw umin ptr %0, i16 %1 monotonic, align 2
+  ; ALL: atom.cas.b32
+  %5 = atomicrmw umax ptr %0, i16 %1 monotonic, align 2
+  ret void
+}
+
+; TODO: We might still want to test other types, such as i128. Currently the
+; backend doesn't support them. Atomic expand only supports expansion to cas of
+; the same bitwidth, which means even after expansion, the back end still
+; doesn't support the instruction. Here we still put the tests. Remove the
+; comment once we have proper support, either from atomic expand or backend.
+
+; define void @bitwise_i128(ptr %0, i128 %1) {
+; entry:
+;   %2 = atomicrmw and ptr %0, i128 %1 monotonic, align 16
+;   %3 = atomicrmw or ptr %0, i128 %1 monotonic, align 16
+;   %4 = atomicrmw xor ptr %0, i128 %1 monotonic, align 16
+;   %5 = atomicrmw xchg ptr %0, i128 %1 monotonic, align 16
+;   ret void
+; }
+
+; define void @minmax_i128(ptr %0, i128 %1) {
+; entry:
+;   %2 = atomicrmw min ptr %0, i128 %1 monotonic, align 16
+;   %3 = atomicrmw max ptr %0, i128 %1 monotonic, align 16
+;   %4 = atomicrmw umin ptr %0, i128 %1 monotonic, align 16
+;   %5 = atomicrmw umax ptr %0, i128 %1 monotonic, align 16
+;   ret void
+; }