[llvm] 98f4713 - [AMDGPU] Split entry basic block after alloca instructions.

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 31 22:09:46 PDT 2021


Author: hsmahesha
Date: 2021-09-01T10:18:44+05:30
New Revision: 98f47131228cc73b5308e3cc6fd70375808594e3

URL: https://github.com/llvm/llvm-project/commit/98f47131228cc73b5308e3cc6fd70375808594e3
DIFF: https://github.com/llvm/llvm-project/commit/98f47131228cc73b5308e3cc6fd70375808594e3.diff

LOG: [AMDGPU] Split entry basic block after alloca instructions.

While initializing the LDS pointers within entry basic block of kernel(s), make
sure that the entry basic block is split after alloca instructions.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D108971

Added: 
    llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-split-entry-bb-after-alloca.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
index dabb4d006d994..39e6afd0b230c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@@ -191,10 +191,19 @@ class ReplaceLDSUseImpl {
     if (!BasicBlockEntry.second)
       return BasicBlockEntry.first->second;
 
-    // Split entry basic block of kernel K.
-    auto *EI = &(*(K->getEntryBlock().getFirstInsertionPt()));
-    IRBuilder<> Builder(EI);
-
+    // Split entry basic block of kernel K just after alloca.
+    //
+    // Find the split point just after alloca.
+    auto &EBB = K->getEntryBlock();
+    auto *EI = &(*(EBB.getFirstInsertionPt()));
+    BasicBlock::reverse_iterator RIT(EBB.getTerminator());
+    while (!isa<AllocaInst>(*RIT) && (&*RIT != EI))
+      ++RIT;
+    if (isa<AllocaInst>(*RIT))
+      --RIT;
+
+    // Split entry basic block.
+    IRBuilder<> Builder(&*RIT);
     Value *Mbcnt =
         Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
                                 {Builder.getInt32(-1), Builder.getInt32(0)});

diff  --git a/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-split-entry-bb-after-alloca.ll b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-split-entry-bb-after-alloca.ll
new file mode 100644
index 0000000000000..80122cf0833a7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-split-entry-bb-after-alloca.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -mtriple=amdgcn--  -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
+
+; DESCRIPTION:
+;
+; There is one lds global defined here, and this lds is used within a single non-kernel
+; function, as an operand of nested constant expression, and this non-kernel function is
+; reachable from kernel. Hence nested constant expression should to be converted into a
+; series of instructons and pointer replacement should take place.
+;
+; Further the entry basic block of the kernel @k0 contains alloca instruction. Hence the
+; entry basic splitting for pointer initialization should happen after alloca.
+;
+
+; Original LDS should exist.
+; CHECK: @used_only_within_func = addrspace(3) global [4 x i32] undef, align 4
+ at used_only_within_func = addrspace(3) global [4 x i32] undef, align 4
+
+; Pointers should be created.
+; CHECK: @used_only_within_func.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
+
+; Pointer replacement code should be added.
+define void @f0(i32 %x) {
+; CHECK-LABEL: entry:
+; CHECK:   %0 = load i16, i16 addrspace(3)* @used_only_within_func.ptr, align 2
+; CHECK:   %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
+; CHECK:   %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
+; CHECK:   %3 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 0
+; CHECK:   %4 = addrspacecast i32 addrspace(3)* %3 to i32*
+; CHECK:   %5 = ptrtoint i32* %4 to i64
+; CHECK:   %6 = add i64 %5, %5
+; CHECK:   %7 = inttoptr i64 %6 to i32*
+; CHECK:   store i32 %x, i32* %7, align 4
+; CHECK:   ret void
+entry:
+  store i32 %x, i32* inttoptr (i64 add (i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast ([4 x i32] addrspace(3)* @used_only_within_func to i32 addrspace(3)*) to i32*) to i64), i64 ptrtoint (i32* addrspacecast (i32 addrspace(3)* bitcast ([4 x i32] addrspace(3)* @used_only_within_func to i32 addrspace(3)*) to i32*) to i64)) to i32*), align 4
+  ret void
+}
+
+; Pointer initialization code shoud be added
+define amdgpu_kernel void @k0() {
+; CHECK-LABEL: entry:
+; CHECK:   %0 = alloca i64, align 8, addrspace(5)
+; CHECK:   %1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK:   %2 = icmp eq i32 %1, 0
+; CHECK:   br i1 %2, label %3, label %4
+;
+; CHECK-LABEL: 3:
+; CHECK:   store i16 ptrtoint ([4 x i32] addrspace(3)* @used_only_within_func to i16), i16 addrspace(3)* @used_only_within_func.ptr, align 2
+; CHECK:   br label %4
+
+; CHECK-LABEL: 4:
+; CHECK:   call void @llvm.amdgcn.wave.barrier()
+; CHECK:   %5 = addrspacecast i64 addrspace(5)* %0 to i64*
+; CHECK:   call void @f0(i32 0)
+; CHECK:   ret void
+entry:
+  %0 = alloca i64, align 8, addrspace(5)
+  %1 = addrspacecast i64 addrspace(5)* %0 to i64*
+  call void @f0(i32 0)
+  ret void
+}


        


More information about the llvm-commits mailing list