[PATCH] D96386: [AMDGPU] Fix promote alloca with double use in a same insn

Tue Feb 9 16:54:32 PST 2021

rampitec created this revision.
rampitec added reviewers: arsenm, yaxunl.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, nhaehnle, jvesely, kzhuravl.
rampitec requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.

If we have an instruction where more than one pointer operands
are derived from the same promoted alloca, we are fixing it for
one argument and do not fix a second use considering this user
done.

Fix this by sorting user vector to make sure a dependent use
always follows its dependency.

Fixes: SWDEV-271358


https://reviews.llvm.org/D96386

Files:
  llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
  llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll


Index: llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
+++ llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
@@ -2,6 +2,7 @@
 
 declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
 declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0
 
 declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0
 declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0
@@ -61,5 +62,20 @@
   ret void
 }
 
+; CHECK-LABEL: @promote_alloca_used_twice_in_memcpy(
+; CHECK: %i = bitcast double addrspace(3)* %arrayidx1 to i8 addrspace(3)*
+; CHECK: %i1 = bitcast double addrspace(3)* %arrayidx2 to i8 addrspace(3)*
+; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 8 %i, i8 addrspace(3)* align 8 %i1, i64 16, i1 false)
+define amdgpu_kernel void @promote_alloca_used_twice_in_memcpy(i32 %c) {
+entry:
+  %r = alloca double, align 8
+  %arrayidx1 = getelementptr inbounds double, double* %r, i32 1
+  %i = bitcast double* %arrayidx1 to i8*
+  %arrayidx2 = getelementptr inbounds double, double* %r, i32 %c
+  %i1 = bitcast double* %arrayidx2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 dereferenceable(16) %i, i8* align 8 dereferenceable(16) %i1, i64 16, i1 false)
+  ret void
+}
+
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" }
 attributes #1 = { nounwind readnone }
Index: llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -600,8 +600,12 @@
     Value *BaseAlloca, Value *Val, std::vector<Value *> &WorkList) const {
 
   for (User *User : Val->users()) {
-    if (is_contained(WorkList, User))
+    auto UI = llvm::find(WorkList, User);
+    if (UI != WorkList.end()) {
+      WorkList.erase(UI);
+      WorkList.push_back(User);
       continue;
+    }
 
     if (CallInst *CI = dyn_cast<CallInst>(User)) {
       if (!isCallPromotable(CI))


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D96386.322545.patch
Type: text/x-patch
Size: 2370 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210210/8c66e070/attachment-0001.bin>