[llvm] 9fdd258 - Transforms: Fix code duplication between LowerAtomic and AtomicExpand
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 8 16:06:41 PDT 2022
Author: Matt Arsenault
Date: 2022-04-08T19:06:36-04:00
New Revision: 9fdd25848a79892eaacc5414d5aef18555b79919
URL: https://github.com/llvm/llvm-project/commit/9fdd25848a79892eaacc5414d5aef18555b79919
DIFF: https://github.com/llvm/llvm-project/commit/9fdd25848a79892eaacc5414d5aef18555b79919.diff
LOG: Transforms: Fix code duplication between LowerAtomic and AtomicExpand
Added:
Modified:
llvm/include/llvm/Transforms/Utils/LowerAtomic.h
llvm/lib/CodeGen/AtomicExpandPass.cpp
llvm/lib/Transforms/Utils/LowerAtomic.cpp
llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
index cd16d77d50f93..c85f8e3a56461 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
@@ -14,9 +14,11 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
#define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
+#include "llvm/IR/Instructions.h"
+
namespace llvm {
-class AtomicCmpXchgInst;
-class AtomicRMWInst;
+
+class IRBuilderBase;
/// Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI);
@@ -25,6 +27,11 @@ bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI);
/// assuming that doing so is legal. Return true if the lowering
/// succeeds.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI);
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder,
+ Value *Loaded, Value *Inc);
}
#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index dbf49fc53e10c..fc1f832cf99e5 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -544,47 +544,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
}
-/// Emit IR to implement the given atomicrmw operation on values in registers,
-/// returning the new value.
-static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
- Value *Loaded, Value *Inc) {
- Value *NewVal;
- switch (Op) {
- case AtomicRMWInst::Xchg:
- return Inc;
- case AtomicRMWInst::Add:
- return Builder.CreateAdd(Loaded, Inc, "new");
- case AtomicRMWInst::Sub:
- return Builder.CreateSub(Loaded, Inc, "new");
- case AtomicRMWInst::And:
- return Builder.CreateAnd(Loaded, Inc, "new");
- case AtomicRMWInst::Nand:
- return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
- case AtomicRMWInst::Or:
- return Builder.CreateOr(Loaded, Inc, "new");
- case AtomicRMWInst::Xor:
- return Builder.CreateXor(Loaded, Inc, "new");
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::FAdd:
- return Builder.CreateFAdd(Loaded, Inc, "new");
- case AtomicRMWInst::FSub:
- return Builder.CreateFSub(Loaded, Inc, "new");
- default:
- llvm_unreachable("Unknown atomic op");
- }
-}
-
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
LLVMContext &Ctx = AI->getModule()->getContext();
TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
@@ -599,8 +558,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
TargetLoweringBase::AtomicExpansionKind::LLSC);
} else {
auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
- return performAtomicOp(AI->getOperation(), Builder, Loaded,
- AI->getValOperand());
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
};
expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
AI->getAlign(), AI->getOrdering(), PerformOp);
@@ -810,7 +769,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
case AtomicRMWInst::Sub:
case AtomicRMWInst::Nand: {
// The other arithmetic ops need to be masked into place.
- Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
@@ -824,7 +783,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
// truncate down to the original size, and expand out again after
// doing the operation.
Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
- Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
return FinalVal;
}
@@ -1558,8 +1517,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
AI->getOrdering(), AI->getSyncScopeID(),
[&](IRBuilder<> &Builder, Value *Loaded) {
- return performAtomicOp(AI->getOperation(), Builder, Loaded,
- AI->getValOperand());
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
},
CreateCmpXchg);
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
index 5963b47923032..8641581c80392 100644
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -39,60 +39,53 @@ bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
return true;
}
-bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
- IRBuilder<> Builder(RMWI);
- Value *Ptr = RMWI->getPointerOperand();
- Value *Val = RMWI->getValOperand();
-
- LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
- Value *Res = nullptr;
-
- switch (RMWI->getOperation()) {
- default: llvm_unreachable("Unexpected RMW operation");
+Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
+ IRBuilderBase &Builder, Value *Loaded,
+ Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
case AtomicRMWInst::Xchg:
- Res = Val;
- break;
+ return Inc;
case AtomicRMWInst::Add:
- Res = Builder.CreateAdd(Orig, Val);
- break;
+ return Builder.CreateAdd(Loaded, Inc, "new");
case AtomicRMWInst::Sub:
- Res = Builder.CreateSub(Orig, Val);
- break;
+ return Builder.CreateSub(Loaded, Inc, "new");
case AtomicRMWInst::And:
- Res = Builder.CreateAnd(Orig, Val);
- break;
+ return Builder.CreateAnd(Loaded, Inc, "new");
case AtomicRMWInst::Nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
- break;
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
case AtomicRMWInst::Or:
- Res = Builder.CreateOr(Orig, Val);
- break;
+ return Builder.CreateOr(Loaded, Inc, "new");
case AtomicRMWInst::Xor:
- Res = Builder.CreateXor(Orig, Val);
- break;
+ return Builder.CreateXor(Loaded, Inc, "new");
case AtomicRMWInst::Max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
- Val, Orig);
- break;
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
case AtomicRMWInst::Min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
- Orig, Val);
- break;
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
case AtomicRMWInst::UMax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
- Val, Orig);
- break;
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
case AtomicRMWInst::UMin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
- Orig, Val);
- break;
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
case AtomicRMWInst::FAdd:
- Res = Builder.CreateFAdd(Orig, Val);
- break;
+ return Builder.CreateFAdd(Loaded, Inc, "new");
case AtomicRMWInst::FSub:
- Res = Builder.CreateFSub(Orig, Val);
- break;
+ return Builder.CreateFSub(Loaded, Inc, "new");
+ default:
+ llvm_unreachable("Unknown atomic op");
}
+}
+
+bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+ IRBuilder<> Builder(RMWI);
+ Value *Ptr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
+ Value *Res = buildAtomicRMWValue(RMWI->getOperation(), Builder, Orig, Val);
Builder.CreateStore(Res, Ptr);
RMWI->replaceAllUsesWith(Orig);
RMWI->eraseFromParent();
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index ff4ccfc60ad19..350915097ec52 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -196,8 +196,8 @@ define i32 @atomicrmw_xchg_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_add_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_add_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 4
-; IR-NEXT: store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP1]], 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_add_private_i32:
@@ -217,8 +217,8 @@ define i32 @atomicrmw_add_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_sub_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_sub_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 4
-; IR-NEXT: store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = sub i32 [[TMP1]], 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_sub_private_i32:
@@ -238,8 +238,8 @@ define i32 @atomicrmw_sub_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_and_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_and_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 4
-; IR-NEXT: store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = and i32 [[TMP1]], 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_and_private_i32:
@@ -260,8 +260,8 @@ define i32 @atomicrmw_nand_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_nand_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
; IR-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 4
-; IR-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], -1
-; IR-NEXT: store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_nand_private_i32:
@@ -282,8 +282,8 @@ define i32 @atomicrmw_nand_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_or_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_or_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 4
-; IR-NEXT: store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = or i32 [[TMP1]], 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_or_private_i32:
@@ -303,8 +303,8 @@ define i32 @atomicrmw_or_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_xor_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_xor_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 4
-; IR-NEXT: store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP1]], 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_xor_private_i32:
@@ -324,9 +324,9 @@ define i32 @atomicrmw_xor_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_max_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_max_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 4
-; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 4, i32 [[TMP1]]
-; IR-NEXT: store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 4
+; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_max_private_i32:
@@ -346,9 +346,9 @@ define i32 @atomicrmw_max_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_min_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_min_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 4
-; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
-; IR-NEXT: store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 4
+; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_min_private_i32:
@@ -368,9 +368,9 @@ define i32 @atomicrmw_min_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_umax_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_umax_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 4
-; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 4, i32 [[TMP1]]
-; IR-NEXT: store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 4
+; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_umax_private_i32:
@@ -390,9 +390,9 @@ define i32 @atomicrmw_umax_private_i32(i32 addrspace(5)* %ptr) {
define i32 @atomicrmw_umin_private_i32(i32 addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_umin_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 4
-; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
-; IR-NEXT: store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[TMP2:%.*]] = icmp ule i32 [[TMP1]], 4
+; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret i32 [[TMP1]]
;
; GCN-LABEL: atomicrmw_umin_private_i32:
@@ -412,8 +412,8 @@ define i32 @atomicrmw_umin_private_i32(i32 addrspace(5)* %ptr) {
define float @atomicrmw_fadd_private_i32(float addrspace(5)* %ptr) {
; IR-LABEL: @atomicrmw_fadd_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
-; IR-NEXT: store float [[TMP2]], float addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; IR-NEXT: store float [[NEW]], float addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret float [[TMP1]]
;
; GCN-LABEL: atomicrmw_fadd_private_i32:
@@ -433,8 +433,8 @@ define float @atomicrmw_fadd_private_i32(float addrspace(5)* %ptr) {
define float @atomicrmw_fsub_private_i32(float addrspace(5)* %ptr, float %val) {
; IR-LABEL: @atomicrmw_fsub_private_i32(
; IR-NEXT: [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT: [[TMP2:%.*]] = fsub float [[TMP1]], [[VAL:%.*]]
-; IR-NEXT: store float [[TMP2]], float addrspace(5)* [[PTR]], align 4
+; IR-NEXT: [[NEW:%.*]] = fsub float [[TMP1]], [[VAL:%.*]]
+; IR-NEXT: store float [[NEW]], float addrspace(5)* [[PTR]], align 4
; IR-NEXT: ret float [[TMP1]]
;
; GCN-LABEL: atomicrmw_fsub_private_i32:
@@ -461,8 +461,8 @@ define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(i32 addr
; IR-NEXT: store i32 1, i32 addrspace(5)* [[GEP2]], align 4
; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 [[IN:%.*]]
; IR-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[GEP3]], align 4
-; IR-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 7
-; IR-NEXT: store i32 [[TMP1]], i32 addrspace(5)* [[GEP3]], align 4
+; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP0]], 7
+; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[GEP3]], align 4
; IR-NEXT: store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
; IR-NEXT: ret void
;
diff --git a/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
index a041dcbdca05c..07694a91d3617 100644
--- a/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
+++ b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
@@ -11,8 +11,8 @@ define double @kernel(double addrspace(5)* %ptr, double %val) {
%res = atomicrmw fadd double addrspace(5)* %ptr, double %val monotonic, align 8
ret double %res
; CHECK: %1 = load double, double addrspace(5)* %ptr, align 8
-; CHECK-NEXT: %2 = fadd double %1, %val
-; CHECK-NEXT: store double %2, double addrspace(5)* %ptr, align 8
+; CHECK-NEXT: %new = fadd double %1, %val
+; CHECK-NEXT: store double %new, double addrspace(5)* %ptr, align 8
; CHECK-NEXT: ret double %1
}
More information about the llvm-commits
mailing list