[llvm] r293587 - InferAddressSpaces: Support memory intrinsics
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 30 17:56:57 PST 2017
Author: arsenm
Date: Mon Jan 30 19:56:57 2017
New Revision: 293587
URL: http://llvm.org/viewvc/llvm-project?rev=293587&view=rev
Log:
InferAddressSpaces: Support memory intrinsics
Added:
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp
llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
Modified: llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp?rev=293587&r1=293586&r2=293587&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp Mon Jan 30 19:56:57 2017
@@ -153,7 +153,15 @@ private:
Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
DenseSet<Value *> *Visited) const;
+ bool rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const;
+ void collectRewritableIntrinsicOperands(
+ IntrinsicInst *II,
+ std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const;
+
std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+
Value *cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
@@ -210,6 +218,47 @@ static SmallVector<Value *, 2> getPointe
}
}
+// TODO: Move logic to TTI?
+bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
+ Module *M = II->getParent()->getParent()->getParent();
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::objectsize:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec: {
+ Type *DestTy = II->getType();
+ Type *SrcTy = NewV->getType();
+ Function *NewDecl
+ = Intrinsic::getDeclaration(M, II->getIntrinsicID(), { DestTy, SrcTy });
+ II->setArgOperand(0, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+// TODO: Move logic to TTI?
+void InferAddressSpaces::collectRewritableIntrinsicOperands(
+ IntrinsicInst *II,
+ std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::objectsize:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ appendsFlatAddressExpressionToPostorderStack(
+ II->getArgOperand(0), PostorderStack, Visited);
+ break;
+ default:
+ break;
+ }
+}
+
+// Returns all flat address expressions in function F. The elements are
// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
@@ -224,7 +273,7 @@ void InferAddressSpaces::appendsFlatAddr
}
// Returns all flat address expressions in function F. The elements are ordered
-// in postorder.
+// ordered in postorder.
std::vector<Value *>
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
@@ -249,8 +298,15 @@ InferAddressSpaces::collectFlatAddressEx
PushPtrOperand(RMW->getPointerOperand());
else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
PushPtrOperand(CmpX->getPointerOperand());
-
- // TODO: Support intrinsics
+ else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
+ // For memset/memcpy/memmove, any pointer operand can be replaced.
+ PushPtrOperand(MI->getRawDest());
+
+ // Handle 2nd operand for memcpy/memmove.
+ if (auto *MTI = dyn_cast<MemTransferInst>(MI))
+ PushPtrOperand(MTI->getRawSource());
+ } else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
}
std::vector<Value *> Postorder; // The resultant postorder.
@@ -560,6 +616,63 @@ static bool isSimplePointerUseValidToRep
return false;
}
+/// Update memory intrinsic uses that require more complex processing than
+/// simple memory instructions. Thse require re-mangling and may have multiple
+/// pointer operands.
+static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI,
+ Value *OldV, Value *NewV) {
+ IRBuilder<> B(MI);
+ MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
+ MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
+ MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
+
+ if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
+ B.CreateMemSet(NewV, MSI->getValue(),
+ MSI->getLength(), MSI->getAlignment(),
+ false, // isVolatile
+ TBAA, ScopeMD, NoAliasMD);
+ } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
+ Value *Src = MTI->getRawSource();
+ Value *Dest = MTI->getRawDest();
+
+ // Be careful in case this is a self-to-self copy.
+ if (Src == OldV)
+ Src = NewV;
+
+ if (Dest == OldV)
+ Dest = NewV;
+
+ if (isa<MemCpyInst>(MTI)) {
+ MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
+ B.CreateMemCpy(Dest, Src, MTI->getLength(),
+ MTI->getAlignment(),
+ false, // isVolatile
+ TBAA, TBAAStruct, ScopeMD, NoAliasMD);
+ } else {
+ assert(isa<MemMoveInst>(MTI));
+ B.CreateMemMove(Dest, Src, MTI->getLength(),
+ MTI->getAlignment(),
+ false, // isVolatile
+ TBAA, ScopeMD, NoAliasMD);
+ }
+ } else
+ llvm_unreachable("unhandled MemIntrinsic");
+
+ MI->eraseFromParent();
+ return true;
+}
+
+static Value::use_iterator skipToNextUser(Value::use_iterator I,
+ Value::use_iterator End) {
+ User *CurUser = I->getUser();
+ ++I;
+
+ while (I != End && I->getUser() == CurUser)
+ ++I;
+
+ return I;
+}
+
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
const std::vector<Value *> &Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
@@ -595,20 +708,38 @@ bool InferAddressSpaces::rewriteWithNewA
if (NewV == nullptr)
continue;
- SmallVector<Use *, 4> Uses;
- for (Use &U : V->uses())
- Uses.push_back(&U);
-
DEBUG(dbgs() << "Replacing the uses of " << *V
<< "\n with\n " << *NewV << '\n');
- for (Use *U : Uses) {
- if (isSimplePointerUseValidToReplace(*U)) {
+ Value::use_iterator I, E, Next;
+ for (I = V->use_begin(), E = V->use_end(); I != E; ) {
+ Use &U = *I;
+
+ // Some users may see the same pointer operand in multiple operands. Skip
+ // to the next instruction.
+ I = skipToNextUser(I, E);
+
+ if (isSimplePointerUseValidToReplace(U)) {
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
- U->set(NewV);
- } else if (isa<Instruction>(U->getUser())) {
+ U.set(NewV);
+ continue;
+ }
+
+ User *CurUser = U.getUser();
+ // Handle more complex cases like intrinsic that need to be remangled.
+ if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
+ if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
+ continue;
+ }
+
+ if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
+ if (rewriteIntrinsicOperands(II, V, NewV))
+ continue;
+ }
+
+ if (isa<Instruction>(CurUser)) {
// Otherwise, replaces the use with flat(NewV).
// TODO: Some optimization opportunities are missed. For example, in
// %0 = icmp eq float* %p, %q
@@ -622,13 +753,14 @@ bool InferAddressSpaces::rewriteWithNewA
BasicBlock::iterator InsertPos = std::next(I->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
- U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
+ U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
} else {
- U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
- V->getType()));
+ U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+ V->getType()));
}
}
}
+
if (V->use_empty())
RecursivelyDeleteTriviallyDeadInstructions(V);
}
Added: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll?rev=293587&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll Mon Jan 30 19:56:57 2017
@@ -0,0 +1,92 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+
+; CHECK-LABEL: @objectsize_group_to_flat_i32(
+; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true)
+define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+ %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @objectsize_global_to_flat_i64(
+; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true)
+define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)*
+ %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true)
+ ret i64 %val
+}
+
+; CHECK-LABEL: @atomicinc_global_to_flat_i32(
+; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y)
+define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
+ ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicinc_group_to_flat_i32(
+; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y)
+define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y)
+ ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicinc_global_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
+define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+ ret i64 %ret
+}
+
+; CHECK-LABEL: @atomicinc_group_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
+define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+ ret i64 %ret
+}
+
+; CHECK-LABEL: @atomicdec_global_to_flat_i32(
+; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val)
+define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
+ ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicdec_group_to_flat_i32(
+; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val)
+define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val)
+ ret i32 %ret
+}
+
+; CHECK-LABEL: @atomicdec_global_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y)
+define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+ ret i64 %ret
+}
+
+; CHECK-LABEL: @atomicdec_group_to_flat_i64(
+; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y)
+define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y)
+ ret i64 %ret
+}
+
+declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1) #1
+declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1) #1
+declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind argmemonly }
Added: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll?rev=293587&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll Mon Jan 30 19:56:57 2017
@@ -0,0 +1,134 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+
+; CHECK-LABEL: @memset_group_to_flat(
+; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+ call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+; CHECK-LABEL: @memset_global_to_flat(
+; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+ call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+; CHECK-LABEL: @memset_group_to_flat_no_md(
+; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
+define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+ call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: @memset_global_to_flat_no_md(
+; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
+define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+ call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
+; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
+; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
+; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+ %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
+; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
+; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
+ %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7
+define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7
+ ret void
+}
+
+; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
+define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
+; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
+define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
+ ret void
+}
+
+; Check for iterator problems if the pointer has 2 uses in the same call
+; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
+; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+ call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
+; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
+ call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ ret void
+}
+
+declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
+declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
+declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"A", !2}
+!2 = !{!"tbaa root"}
+!3 = !{!"B", !2}
+!4 = !{!5}
+!5 = distinct !{!5, !6, !"some scope"}
+!6 = distinct !{!6, !"some domain"}
+!7 = !{i64 0, i64 8, null}
\ No newline at end of file
Modified: llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll?rev=293587&r1=293586&r2=293587&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll (original)
+++ llvm/trunk/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll Mon Jan 30 19:56:57 2017
@@ -115,4 +115,26 @@ define { i32, i1 } @volatile_cmpxchg_gro
ret { i32, i1 } %ret
}
-attributes #0 = { nounwind }
\ No newline at end of file
+; FIXME: Shouldn't be losing names
+; CHECK-LABEL: @volatile_memset_group_to_flat(
+; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
+define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
+ call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
+ ret void
+}
+
+; CHECK-LABEL: @volatile_memset_global_to_flat(
+; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
+define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
+ call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
+ ret void
+}
+
+declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
More information about the llvm-commits
mailing list