[llvm] 9fdd258 - Transforms: Fix code duplication between LowerAtomic and AtomicExpand

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 8 16:06:41 PDT 2022


Author: Matt Arsenault
Date: 2022-04-08T19:06:36-04:00
New Revision: 9fdd25848a79892eaacc5414d5aef18555b79919

URL: https://github.com/llvm/llvm-project/commit/9fdd25848a79892eaacc5414d5aef18555b79919
DIFF: https://github.com/llvm/llvm-project/commit/9fdd25848a79892eaacc5414d5aef18555b79919.diff

LOG: Transforms: Fix code duplication between LowerAtomic and AtomicExpand

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/LowerAtomic.h
    llvm/lib/CodeGen/AtomicExpandPass.cpp
    llvm/lib/Transforms/Utils/LowerAtomic.cpp
    llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
    llvm/test/CodeGen/NVPTX/atomic-lower-local.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
index cd16d77d50f93..c85f8e3a56461 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
@@ -14,9 +14,11 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
 #define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
 
+#include "llvm/IR/Instructions.h"
+
 namespace llvm {
-class AtomicCmpXchgInst;
-class AtomicRMWInst;
+
+class IRBuilderBase;
 
 /// Convert the given Cmpxchg into primitive load and compare.
 bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI);
@@ -25,6 +27,11 @@ bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI);
 /// assuming that doing so is legal. Return true if the lowering
 /// succeeds.
 bool lowerAtomicRMWInst(AtomicRMWInst *RMWI);
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder,
+                           Value *Loaded, Value *Inc);
 }
 
 #endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H

diff  --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index dbf49fc53e10c..fc1f832cf99e5 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -544,47 +544,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
     NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
 }
 
-/// Emit IR to implement the given atomicrmw operation on values in registers,
-/// returning the new value.
-static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
-                              Value *Loaded, Value *Inc) {
-  Value *NewVal;
-  switch (Op) {
-  case AtomicRMWInst::Xchg:
-    return Inc;
-  case AtomicRMWInst::Add:
-    return Builder.CreateAdd(Loaded, Inc, "new");
-  case AtomicRMWInst::Sub:
-    return Builder.CreateSub(Loaded, Inc, "new");
-  case AtomicRMWInst::And:
-    return Builder.CreateAnd(Loaded, Inc, "new");
-  case AtomicRMWInst::Nand:
-    return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
-  case AtomicRMWInst::Or:
-    return Builder.CreateOr(Loaded, Inc, "new");
-  case AtomicRMWInst::Xor:
-    return Builder.CreateXor(Loaded, Inc, "new");
-  case AtomicRMWInst::Max:
-    NewVal = Builder.CreateICmpSGT(Loaded, Inc);
-    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
-  case AtomicRMWInst::Min:
-    NewVal = Builder.CreateICmpSLE(Loaded, Inc);
-    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
-  case AtomicRMWInst::UMax:
-    NewVal = Builder.CreateICmpUGT(Loaded, Inc);
-    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
-  case AtomicRMWInst::UMin:
-    NewVal = Builder.CreateICmpULE(Loaded, Inc);
-    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
-  case AtomicRMWInst::FAdd:
-    return Builder.CreateFAdd(Loaded, Inc, "new");
-  case AtomicRMWInst::FSub:
-    return Builder.CreateFSub(Loaded, Inc, "new");
-  default:
-    llvm_unreachable("Unknown atomic op");
-  }
-}
-
 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
   LLVMContext &Ctx = AI->getModule()->getContext();
   TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
@@ -599,8 +558,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
                               TargetLoweringBase::AtomicExpansionKind::LLSC);
     } else {
       auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
-        return performAtomicOp(AI->getOperation(), Builder, Loaded,
-                               AI->getValOperand());
+        return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+                                   AI->getValOperand());
       };
       expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
                            AI->getAlign(), AI->getOrdering(), PerformOp);
@@ -810,7 +769,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
   case AtomicRMWInst::Sub:
   case AtomicRMWInst::Nand: {
     // The other arithmetic ops need to be masked into place.
-    Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+    Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
     Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
     Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
     Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
@@ -824,7 +783,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
     // truncate down to the original size, and expand out again after
     // doing the operation.
     Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
-    Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
+    Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
     Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
     return FinalVal;
   }
@@ -1558,8 +1517,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
       Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
       AI->getOrdering(), AI->getSyncScopeID(),
       [&](IRBuilder<> &Builder, Value *Loaded) {
-        return performAtomicOp(AI->getOperation(), Builder, Loaded,
-                               AI->getValOperand());
+        return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+                                   AI->getValOperand());
       },
       CreateCmpXchg);
 

diff  --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
index 5963b47923032..8641581c80392 100644
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -39,60 +39,53 @@ bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
   return true;
 }
 
-bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
-  IRBuilder<> Builder(RMWI);
-  Value *Ptr = RMWI->getPointerOperand();
-  Value *Val = RMWI->getValOperand();
-
-  LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
-  Value *Res = nullptr;
-
-  switch (RMWI->getOperation()) {
-  default: llvm_unreachable("Unexpected RMW operation");
+Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
+                                 IRBuilderBase &Builder, Value *Loaded,
+                                 Value *Inc) {
+  Value *NewVal;
+  switch (Op) {
   case AtomicRMWInst::Xchg:
-    Res = Val;
-    break;
+    return Inc;
   case AtomicRMWInst::Add:
-    Res = Builder.CreateAdd(Orig, Val);
-    break;
+    return Builder.CreateAdd(Loaded, Inc, "new");
   case AtomicRMWInst::Sub:
-    Res = Builder.CreateSub(Orig, Val);
-    break;
+    return Builder.CreateSub(Loaded, Inc, "new");
   case AtomicRMWInst::And:
-    Res = Builder.CreateAnd(Orig, Val);
-    break;
+    return Builder.CreateAnd(Loaded, Inc, "new");
   case AtomicRMWInst::Nand:
-    Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
-    break;
+    return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
   case AtomicRMWInst::Or:
-    Res = Builder.CreateOr(Orig, Val);
-    break;
+    return Builder.CreateOr(Loaded, Inc, "new");
   case AtomicRMWInst::Xor:
-    Res = Builder.CreateXor(Orig, Val);
-    break;
+    return Builder.CreateXor(Loaded, Inc, "new");
   case AtomicRMWInst::Max:
-    Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
-                               Val, Orig);
-    break;
+    NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
   case AtomicRMWInst::Min:
-    Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
-                               Orig, Val);
-    break;
+    NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
   case AtomicRMWInst::UMax:
-    Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
-                               Val, Orig);
-    break;
+    NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
   case AtomicRMWInst::UMin:
-    Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
-                               Orig, Val);
-    break;
+    NewVal = Builder.CreateICmpULE(Loaded, Inc);
+    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
   case AtomicRMWInst::FAdd:
-    Res = Builder.CreateFAdd(Orig, Val);
-    break;
+    return Builder.CreateFAdd(Loaded, Inc, "new");
   case AtomicRMWInst::FSub:
-    Res = Builder.CreateFSub(Orig, Val);
-    break;
+    return Builder.CreateFSub(Loaded, Inc, "new");
+  default:
+    llvm_unreachable("Unknown atomic op");
   }
+}
+
+bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+  IRBuilder<> Builder(RMWI);
+  Value *Ptr = RMWI->getPointerOperand();
+  Value *Val = RMWI->getValOperand();
+
+  LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
+  Value *Res = buildAtomicRMWValue(RMWI->getOperation(), Builder, Orig, Val);
   Builder.CreateStore(Res, Ptr);
   RMWI->replaceAllUsesWith(Orig);
   RMWI->eraseFromParent();

diff  --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index ff4ccfc60ad19..350915097ec52 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -196,8 +196,8 @@ define i32 @atomicrmw_xchg_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_add_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_add_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 4
-; IR-NEXT:    store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = add i32 [[TMP1]], 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_add_private_i32:
@@ -217,8 +217,8 @@ define i32 @atomicrmw_add_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_sub_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_sub_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], 4
-; IR-NEXT:    store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = sub i32 [[TMP1]], 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_sub_private_i32:
@@ -238,8 +238,8 @@ define i32 @atomicrmw_sub_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_and_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_and_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 4
-; IR-NEXT:    store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = and i32 [[TMP1]], 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_and_private_i32:
@@ -260,8 +260,8 @@ define i32 @atomicrmw_nand_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_nand_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
 ; IR-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 4
-; IR-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], -1
-; IR-NEXT:    store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = xor i32 [[TMP2]], -1
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_nand_private_i32:
@@ -282,8 +282,8 @@ define i32 @atomicrmw_nand_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_or_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_or_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 4
-; IR-NEXT:    store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = or i32 [[TMP1]], 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_or_private_i32:
@@ -303,8 +303,8 @@ define i32 @atomicrmw_or_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_xor_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_xor_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 4
-; IR-NEXT:    store i32 [[TMP2]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = xor i32 [[TMP1]], 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_xor_private_i32:
@@ -324,9 +324,9 @@ define i32 @atomicrmw_xor_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_max_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_max_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 4
-; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 4, i32 [[TMP1]]
-; IR-NEXT:    store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 4
+; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_max_private_i32:
@@ -346,9 +346,9 @@ define i32 @atomicrmw_max_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_min_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_min_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 4
-; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
-; IR-NEXT:    store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 4
+; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_min_private_i32:
@@ -368,9 +368,9 @@ define i32 @atomicrmw_min_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_umax_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_umax_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 4
-; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 4, i32 [[TMP1]]
-; IR-NEXT:    store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 4
+; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_umax_private_i32:
@@ -390,9 +390,9 @@ define i32 @atomicrmw_umax_private_i32(i32 addrspace(5)* %ptr) {
 define i32 @atomicrmw_umin_private_i32(i32 addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_umin_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 4
-; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
-; IR-NEXT:    store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[TMP1]], 4
+; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret i32 [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_umin_private_i32:
@@ -412,8 +412,8 @@ define i32 @atomicrmw_umin_private_i32(i32 addrspace(5)* %ptr) {
 define float @atomicrmw_fadd_private_i32(float addrspace(5)* %ptr) {
 ; IR-LABEL: @atomicrmw_fadd_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 2.000000e+00
-; IR-NEXT:    store float [[TMP2]], float addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = fadd float [[TMP1]], 2.000000e+00
+; IR-NEXT:    store float [[NEW]], float addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret float [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_fadd_private_i32:
@@ -433,8 +433,8 @@ define float @atomicrmw_fadd_private_i32(float addrspace(5)* %ptr) {
 define float @atomicrmw_fsub_private_i32(float addrspace(5)* %ptr, float %val) {
 ; IR-LABEL: @atomicrmw_fsub_private_i32(
 ; IR-NEXT:    [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4
-; IR-NEXT:    [[TMP2:%.*]] = fsub float [[TMP1]], [[VAL:%.*]]
-; IR-NEXT:    store float [[TMP2]], float addrspace(5)* [[PTR]], align 4
+; IR-NEXT:    [[NEW:%.*]] = fsub float [[TMP1]], [[VAL:%.*]]
+; IR-NEXT:    store float [[NEW]], float addrspace(5)* [[PTR]], align 4
 ; IR-NEXT:    ret float [[TMP1]]
 ;
 ; GCN-LABEL: atomicrmw_fsub_private_i32:
@@ -461,8 +461,8 @@ define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(i32 addr
 ; IR-NEXT:    store i32 1, i32 addrspace(5)* [[GEP2]], align 4
 ; IR-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 [[IN:%.*]]
 ; IR-NEXT:    [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[GEP3]], align 4
-; IR-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 7
-; IR-NEXT:    store i32 [[TMP1]], i32 addrspace(5)* [[GEP3]], align 4
+; IR-NEXT:    [[NEW:%.*]] = add i32 [[TMP0]], 7
+; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[GEP3]], align 4
 ; IR-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
 ; IR-NEXT:    ret void
 ;

diff  --git a/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
index a041dcbdca05c..07694a91d3617 100644
--- a/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
+++ b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
@@ -11,8 +11,8 @@ define double @kernel(double addrspace(5)* %ptr, double %val) {
   %res = atomicrmw fadd double addrspace(5)* %ptr, double %val monotonic, align 8
   ret double %res
 ; CHECK:   %1 = load double, double addrspace(5)* %ptr, align 8
-; CHECK-NEXT:   %2 = fadd double %1, %val
-; CHECK-NEXT:   store double %2, double addrspace(5)* %ptr, align 8
+; CHECK-NEXT:   %new = fadd double %1, %val
+; CHECK-NEXT:   store double %new, double addrspace(5)* %ptr, align 8
 ; CHECK-NEXT:   ret double %1
 }
 


        


More information about the llvm-commits mailing list