[llvm] pr/amdgpu closed world (PR #66488)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 20:19:51 PDT 2024


shiltian wrote:

> Hi @jdoerfert, thanks for working on this! I tried this patch on the following indirect call test but it seems to fail on AMDGPU, in spite of passing on X86:
> 
> ```
> ; RUN: opt -S -mtriple=x86_64-unknown-linux-gnu -passes=attributor -attributor-assume-closed-world -o - %s | FileCheck %s --check-prefix=GENERIC
> ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world -o - %s | FileCheck %s --check-prefix=AMDGPU
> 
> ; Function Attrs: mustprogress nounwind uwtable
> define dso_local noundef i32 @_Z3foov() #0 {
> ; GENERIC-LABEL: define dso_local noundef i32 @_Z3foov(
> ; GENERIC-SAME: ) #[[ATTR0:[0-9]+]] {
> ; GENERIC-NEXT:    ret i32 1
> ;
> ; AMDGPU-LABEL: define dso_local noundef i32 @_Z3foov(
> ; AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] {
> ; AMDGPU-NEXT:    ret i32 1
> ;
>   ret i32 1
> }
> 
> ; Function Attrs: mustprogress nounwind uwtable
> define dso_local noundef i32 @_Z3barv() #0 {
> ; GENERIC-LABEL: define dso_local noundef i32 @_Z3barv(
> ; GENERIC-SAME: ) #[[ATTR0]] {
> ; GENERIC-NEXT:    ret i32 2
> ;
> ; AMDGPU-LABEL: define dso_local noundef i32 @_Z3barv(
> ; AMDGPU-SAME: ) #[[ATTR0]] {
> ; AMDGPU-NEXT:    ret i32 2
> ;
>   ret i32 2
> }
> 
> ; Function Attrs: mustprogress norecurse uwtable
> define amdgpu_kernel void @main(i32 noundef %0, ptr %out) #1 {
> ; GENERIC-LABEL: define amdgpu_kernel void @main(
> ; GENERIC:    [[TMP11:%.*]] = call noundef i32 @_Z3barv()
> ; GENERIC:    [[TMP14:%.*]] = call noundef i32 @_Z3foov()
> ;
> ; AMDGPU-LABEL: define amdgpu_kernel void @main(
> ; AMDGPU:    [[TMP11:%.*]] = call noundef i32 @_Z3barv()
> ; AMDGPU:    [[TMP14:%.*]] = call noundef i32 @_Z3foov()
> ;
>   %2 = alloca i32, align 4
>   %3 = alloca i32, align 4
>   %4 = alloca ptr, align 8
>   store i32 0, ptr %2, align 4
>   store i32 %0, ptr %3, align 4
>   call void @llvm.lifetime.start.p0(i64 8, ptr %4) #3
>   store ptr @_Z3foov, ptr %4, align 8
>   %5 = load i32, ptr %3, align 4
>   %6 = icmp ne i32 %5, 0
>   br i1 %6, label %7, label %8
> 
> 7:                                                ; preds = %1
>   store ptr @_Z3barv, ptr %4, align 8
>   br label %8
> 
> 8:                                                ; preds = %7, %1
>   %9 = load ptr, ptr %4, align 8
>   %10 = call noundef i32 %9()
>   store i32 %10, ptr %out, align 4
>   call void @llvm.lifetime.end.p0(i64 8, ptr %4) #3
>   ret void
> }
> 
> ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
> declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
> 
> ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
> declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
> 
> attributes #0 = { mustprogress nounwind uwtable }
> attributes #1 = { mustprogress norecurse uwtable }
> attributes #2 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
> attributes #3 = { nounwind }
> ```

It can't be specialized because the function pointer is not uniform.

https://github.com/llvm/llvm-project/pull/66488


More information about the llvm-commits mailing list