[llvm] r365796 - [NVPTX] Use atomicrmw fadd instead of intrinsics
Benjamin Kramer via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 10:11:25 PDT 2019
Author: d0k
Date: Thu Jul 11 10:11:25 2019
New Revision: 365796
URL: http://llvm.org/viewvc/llvm-project?rev=365796&view=rev
Log:
[NVPTX] Use atomicrmw fadd instead of intrinsics
AutoUpgrade the old intrinsics to atomicrmw fadd.
Modified:
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
llvm/trunk/test/CodeGen/NVPTX/atomics-sm60.ll
llvm/trunk/test/CodeGen/NVPTX/atomics.ll
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=365796&r1=365795&r2=365796&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Jul 11 10:11:25 2019
@@ -764,6 +764,8 @@ static bool UpgradeIntrinsicFunction1(Fu
.Cases("clz.ll", "popc.ll", "h2f", true)
.Cases("max.i", "max.ll", "max.ui", "max.ull", true)
.Cases("min.i", "min.ll", "min.ui", "min.ull", true)
+ .StartsWith("atomic.load.add.f32.p", true)
+ .StartsWith("atomic.load.add.f64.p", true)
.Default(false);
if (Expand) {
NewFn = nullptr;
@@ -3426,6 +3428,12 @@ void llvm::UpgradeIntrinsicCall(CallInst
Value *Cmp = Builder.CreateICmpSGE(
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
+ } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
+ Name.startswith("atomic.load.add.f64.p"))) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+ Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
+ AtomicOrdering::SequentiallyConsistent);
} else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
Name == "max.ui" || Name == "max.ull")) {
Value *Arg0 = CI->getArgOperand(0);
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=365796&r1=365795&r2=365796&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Thu Jul 11 10:11:25 2019
@@ -3749,8 +3749,6 @@ bool NVPTXTargetLowering::getTgtMemIntri
return true;
}
- case Intrinsic::nvvm_atomic_load_add_f32:
- case Intrinsic::nvvm_atomic_load_add_f64:
case Intrinsic::nvvm_atomic_load_inc_32:
case Intrinsic::nvvm_atomic_load_dec_32:
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td?rev=365796&r1=365795&r2=365796&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td Thu Jul 11 10:11:25 2019
@@ -1134,18 +1134,12 @@ def atomic_load_add_64_s: ATOMIC_SHARED_
(atomic_load_add_64 node:$a, node:$b)>;
def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_add_64 node:$a, node:$b)>;
-def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
-def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
-def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
+def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_fadd node:$a, node:$b)>;
+def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_fadd node:$a, node:$b)>;
+def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_fadd node:$a, node:$b)>;
defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
atomic_load_add_32_g, i32imm, imm>;
@@ -1166,18 +1160,18 @@ defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_A
".add", atomic_load_add_64_gen, i64imm, imm>;
defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
- atomic_load_add_f32_g, f32imm, fpimm>;
+ atomic_load_add_g, f32imm, fpimm>;
defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
- atomic_load_add_f32_s, f32imm, fpimm>;
+ atomic_load_add_s, f32imm, fpimm>;
defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
- atomic_load_add_f32_gen, f32imm, fpimm>;
+ atomic_load_add_gen, f32imm, fpimm>;
defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
- atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
+ atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
- atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
+ atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
- atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
+ atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
// atom_sub
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp?rev=365796&r1=365795&r2=365796&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp Thu Jul 11 10:11:25 2019
@@ -38,7 +38,6 @@ static bool readsLaneId(const IntrinsicI
static bool isNVVMAtomic(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
default: return false;
- case Intrinsic::nvvm_atomic_load_add_f32:
case Intrinsic::nvvm_atomic_load_inc_32:
case Intrinsic::nvvm_atomic_load_dec_32:
Modified: llvm/trunk/test/CodeGen/NVPTX/atomics-sm60.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/atomics-sm60.ll?rev=365796&r1=365795&r2=365796&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/atomics-sm60.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/atomics-sm60.ll Thu Jul 11 10:11:25 2019
@@ -12,6 +12,17 @@ define void @test(double* %dp0, double a
ret void
}
+; CHECK-LABEL .func test2(
+define void @test2(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
+; CHECK: atom.add.f64
+ %r1 = atomicrmw fadd double* %dp0, double %d seq_cst
+; CHECK: atom.global.add.f64
+ %r2 = atomicrmw fadd double addrspace(1)* %dp1, double %d seq_cst
+; CHECK: atom.shared.add.f64
+ %ret = atomicrmw fadd double addrspace(3)* %dp3, double %d seq_cst
+ ret void
+}
+
declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* nocapture, double) #1
declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* nocapture, double) #1
declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* nocapture, double) #1
Modified: llvm/trunk/test/CodeGen/NVPTX/atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/atomics.ll?rev=365796&r1=365795&r2=365796&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/atomics.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/atomics.ll Thu Jul 11 10:11:25 2019
@@ -167,6 +167,27 @@ define float @atomic_add_f32_addrspace3(
ret float %ret
}
+; CHECK-LABEL: atomicrmw_add_f32_generic
+define float @atomicrmw_add_f32_generic(float* %addr, float %val) {
+; CHECK: atom.add.f32
+ %ret = atomicrmw fadd float* %addr, float %val seq_cst
+ ret float %ret
+}
+
+; CHECK-LABEL: atomicrmw_add_f32_addrspace1
+define float @atomicrmw_add_f32_addrspace1(float addrspace(1)* %addr, float %val) {
+; CHECK: atom.global.add.f32
+ %ret = atomicrmw fadd float addrspace(1)* %addr, float %val seq_cst
+ ret float %ret
+}
+
+; CHECK-LABEL: atomicrmw_add_f32_addrspace3
+define float @atomicrmw_add_f32_addrspace3(float addrspace(3)* %addr, float %val) {
+; CHECK: atom.shared.add.f32
+ %ret = atomicrmw fadd float addrspace(3)* %addr, float %val seq_cst
+ ret float %ret
+}
+
; CHECK-LABEL: atomic_cmpxchg_i32
define i32 @atomic_cmpxchg_i32(i32* %addr, i32 %cmp, i32 %new) {
; CHECK: atom.cas.b32
More information about the llvm-commits
mailing list