[llvm] r293587 - InferAddressSpaces: Support memory intrinsics

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 30 17:56:57 PST 2017


Author: arsenm
Date: Mon Jan 30 19:56:57 2017
New Revision: 293587

URL: http://llvm.org/viewvc/llvm-project?rev=293587&view=rev
Log:
InferAddressSpaces: Support memory intrinsics

Added:
    llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
    llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp
    llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll

Modified: llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp?rev=293587&r1=293586&r2=293587&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp Mon Jan 30 19:56:57 2017
@@ -153,7 +153,15 @@ private:
     Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
     DenseSet<Value *> *Visited) const;
 
+  bool rewriteIntrinsicOperands(IntrinsicInst *II,
+                                Value *OldV, Value *NewV) const;
+  void collectRewritableIntrinsicOperands(
+    IntrinsicInst *II,
+    std::vector<std::pair<Value *, bool>> *PostorderStack,
+    DenseSet<Value *> *Visited) const;
+
   std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+
   Value *cloneValueWithNewAddressSpace(
     Value *V, unsigned NewAddrSpace,
     const ValueToValueMapTy &ValueWithNewAddrSpace,
@@ -210,6 +218,47 @@ static SmallVector<Value *, 2> getPointe
   }
 }
 
+// TODO: Move logic to TTI?
+bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
+                                                  Value *OldV,
+                                                  Value *NewV) const {
+  Module *M = II->getParent()->getParent()->getParent();
+
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::objectsize:
+  case Intrinsic::amdgcn_atomic_inc:
+  case Intrinsic::amdgcn_atomic_dec: {
+    Type *DestTy = II->getType();
+    Type *SrcTy = NewV->getType();
+    Function *NewDecl
+      = Intrinsic::getDeclaration(M, II->getIntrinsicID(), { DestTy, SrcTy });
+    II->setArgOperand(0, NewV);
+    II->setCalledFunction(NewDecl);
+    return true;
+  }
+  default:
+    return false;
+  }
+}
+
+// TODO: Move logic to TTI?
+void InferAddressSpaces::collectRewritableIntrinsicOperands(
+  IntrinsicInst *II,
+  std::vector<std::pair<Value *, bool>> *PostorderStack,
+  DenseSet<Value *> *Visited) const {
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::objectsize:
+  case Intrinsic::amdgcn_atomic_inc:
+  case Intrinsic::amdgcn_atomic_dec:
+    appendsFlatAddressExpressionToPostorderStack(
+      II->getArgOperand(0), PostorderStack, Visited);
+    break;
+  default:
+    break;
+  }
+}
+
+// Returns all flat address expressions in function F. The elements are
 // If V is an unvisited flat address expression, appends V to PostorderStack
 // and marks it as visited.
 void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
@@ -224,7 +273,7 @@ void InferAddressSpaces::appendsFlatAddr
 }
 
 // Returns all flat address expressions in function F. The elements are ordered
-// in postorder.
+// ordered in postorder.
 std::vector<Value *>
 InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
   // This function implements a non-recursive postorder traversal of a partial
@@ -249,8 +298,15 @@ InferAddressSpaces::collectFlatAddressEx
       PushPtrOperand(RMW->getPointerOperand());
     else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
       PushPtrOperand(CmpX->getPointerOperand());
-
-    // TODO: Support intrinsics
+    else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
+      // For memset/memcpy/memmove, any pointer operand can be replaced.
+      PushPtrOperand(MI->getRawDest());
+
+      // Handle 2nd operand for memcpy/memmove.
+      if (auto *MTI = dyn_cast<MemTransferInst>(MI))
+       PushPtrOperand(MTI->getRawSource());
+    } else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+      collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
   }
 
   std::vector<Value *> Postorder; // The resultant postorder.
@@ -560,6 +616,63 @@ static bool isSimplePointerUseValidToRep
   return false;
 }
 
+/// Update memory intrinsic uses that require more complex processing than
+/// simple memory instructions. Thse require re-mangling and may have multiple
+/// pointer operands.
+static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI,
+                                     Value *OldV, Value *NewV) {
+  IRBuilder<> B(MI);
+  MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
+  MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
+  MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
+
+  if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
+    B.CreateMemSet(NewV, MSI->getValue(),
+                   MSI->getLength(), MSI->getAlignment(),
+                   false, // isVolatile
+                   TBAA, ScopeMD, NoAliasMD);
+  } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
+    Value *Src = MTI->getRawSource();
+    Value *Dest = MTI->getRawDest();
+
+    // Be careful in case this is a self-to-self copy.
+    if (Src == OldV)
+      Src = NewV;
+
+    if (Dest == OldV)
+      Dest = NewV;
+
+    if (isa<MemCpyInst>(MTI)) {
+      MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
+      B.CreateMemCpy(Dest, Src, MTI->getLength(),
+                     MTI->getAlignment(),
+                     false, // isVolatile
+                     TBAA, TBAAStruct, ScopeMD, NoAliasMD);
+    } else {
+      assert(isa<MemMoveInst>(MTI));
+      B.CreateMemMove(Dest, Src, MTI->getLength(),
+                      MTI->getAlignment(),
+                      false, // isVolatile
+                      TBAA, ScopeMD, NoAliasMD);
+    }
+  } else
+    llvm_unreachable("unhandled MemIntrinsic");
+
+  MI->eraseFromParent();
+  return true;
+}
+
+static Value::use_iterator skipToNextUser(Value::use_iterator I,
+                                          Value::use_iterator End) {
+  User *CurUser = I->getUser();
+  ++I;
+
+  while (I != End && I->getUser() == CurUser)
+    ++I;
+
+  return I;
+}
+
 bool InferAddressSpaces::rewriteWithNewAddressSpaces(
   const std::vector<Value *> &Postorder,
   const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
@@ -595,20 +708,38 @@ bool InferAddressSpaces::rewriteWithNewA
     if (NewV == nullptr)
       continue;
 
-    SmallVector<Use *, 4> Uses;
-    for (Use &U : V->uses())
-      Uses.push_back(&U);
-
     DEBUG(dbgs() << "Replacing the uses of " << *V
                  << "\n  with\n  " << *NewV << '\n');
 
-    for (Use *U : Uses) {
-      if (isSimplePointerUseValidToReplace(*U)) {
+    Value::use_iterator I, E, Next;
+    for (I = V->use_begin(), E = V->use_end(); I != E; ) {
+      Use &U = *I;
+
+      // Some users may see the same pointer operand in multiple operands. Skip
+      // to the next instruction.
+      I = skipToNextUser(I, E);
+
+      if (isSimplePointerUseValidToReplace(U)) {
         // If V is used as the pointer operand of a compatible memory operation,
         // sets the pointer operand to NewV. This replacement does not change
         // the element type, so the resultant load/store is still valid.
-        U->set(NewV);
-      } else if (isa<Instruction>(U->getUser())) {
+        U.set(NewV);
+        continue;
+      }
+
+      User *CurUser = U.getUser();
+      // Handle more complex cases like intrinsic that need to be remangled.
+      if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
+        if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
+          continue;
+      }
+
+      if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
+        if (rewriteIntrinsicOperands(II, V, NewV))
+          continue;
+      }
+
+      if (isa<Instruction>(CurUser)) {
         // Otherwise, replaces the use with flat(NewV).
         // TODO: Some optimization opportunities are missed. For example, in
         //   %0 = icmp eq float* %p, %q
@@ -622,13 +753,14 @@ bool InferAddressSpaces::rewriteWithNewA
           BasicBlock::iterator InsertPos = std::next(I->getIterator());
           while (isa<PHINode>(InsertPos))
             ++InsertPos;
-          U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
+          U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
         } else {
-          U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
-                                                V->getType()));
+          U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+                                               V->getType()));
         }
       }
     }
+
     if (V->use_empty())
       RecursivelyDeleteTriviallyDeadInstructions(V);
   }

Added: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll?rev=293587&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll Mon Jan 30 19:56:57 2017
@@ -0,0 +1,92 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+
+; CHECK-LABEL: @objectsize_group_to_flat_i32(
+; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true)
+define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 {
+  %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+  %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @objectsize_global_to_flat_i64(
+; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true)
+define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 {
+  %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)*
+  %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true)
+  ret i64 %val
+}
+
+; CHECK-LABEL: @atomicinc_global_to_flat_i32(
+; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y)
+define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
+  %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
+  %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
+  ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicinc_group_to_flat_i32(
+; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y)
+define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
+  %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
+  %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
+  ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicinc_global_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
+define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
+  %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
+  %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+  ret i64 %ret
+}
+
+; CHECK-LABEL: @atomicinc_group_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
+define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
+  %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
+  %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+  ret i64 %ret
+}
+
+; CHECK-LABEL: @atomicdec_global_to_flat_i32(
+; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val)
+define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
+  %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
+  %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
+  ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicdec_group_to_flat_i32(
+; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val)
+define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
+  %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
+  %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
+  ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicdec_global_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
+define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
+  %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
+  %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+  ret i64 %ret
+}
+
+; CHECK-LABEL: @atomicdec_group_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
+define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
+  %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
+  %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+  ret i64 %ret
+}
+
+declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1) #1
+declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1) #1
+declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind argmemonly }

Added: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll?rev=293587&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll Mon Jan 30 19:56:57 2017
@@ -0,0 +1,134 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+
+; CHECK-LABEL: @memset_group_to_flat(
+; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
+  %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+  call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+; CHECK-LABEL: @memset_global_to_flat(
+; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
+  %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+  call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+; CHECK-LABEL: @memset_group_to_flat_no_md(
+; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
+define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
+  %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+  call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: @memset_global_to_flat_no_md(
+; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
+define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
+  %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+  call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
+; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+  %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
+; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
+  %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
+; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+  %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+  %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
+; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
+  %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
+  %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
+; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
+  %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7
+define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+  %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7
+  ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
+define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+  %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
+define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+  %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
+  ret void
+}
+
+; Check for iterator problems if the pointer has 2 uses in the same call
+; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
+; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
+  %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+  call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
+; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+  %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+  call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+  ret void
+}
+
+declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
+declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
+declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"A", !2}
+!2 = !{!"tbaa root"}
+!3 = !{!"B", !2}
+!4 = !{!5}
+!5 = distinct !{!5, !6, !"some scope"}
+!6 = distinct !{!6, !"some domain"}
+!7 = !{i64 0, i64 8, null}
\ No newline at end of file

Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll?rev=293587&r1=293586&r2=293587&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll Mon Jan 30 19:56:57 2017
@@ -115,4 +115,26 @@ define { i32, i1 } @volatile_cmpxchg_gro
   ret { i32, i1 } %ret
 }
 
-attributes #0 = { nounwind }
\ No newline at end of file
+; FIXME: Shouldn't be losing names
+; CHECK-LABEL: @volatile_memset_group_to_flat(
+; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
+define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
+  %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+  call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
+  ret void
+}
+
+; CHECK-LABEL: @volatile_memset_global_to_flat(
+; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
+define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
+  %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+  call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
+  ret void
+}
+
+declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }




More information about the llvm-commits mailing list