[llvm] pr/amdgpu closed world (PR #66488)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 6 20:19:51 PDT 2024
shiltian wrote:
> Hi @jdoerfert, thanks for working on this! I tried this patch on the following indirect call test but it seems to fail on AMDGPU, in spite of passing on X86:
>
> ```
> ; RUN: opt -S -mtriple=x86_64-unknown-linux-gnu -passes=attributor -attributor-assume-closed-world -o - %s | FileCheck %s --check-prefix=GENERIC
> ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world -o - %s | FileCheck %s --check-prefix=AMDGPU
>
> ; Function Attrs: mustprogress nounwind uwtable
> define dso_local noundef i32 @_Z3foov() #0 {
> ; GENERIC-LABEL: define dso_local noundef i32 @_Z3foov(
> ; GENERIC-SAME: ) #[[ATTR0:[0-9]+]] {
> ; GENERIC-NEXT: ret i32 1
> ;
> ; AMDGPU-LABEL: define dso_local noundef i32 @_Z3foov(
> ; AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] {
> ; AMDGPU-NEXT: ret i32 1
> ;
> ret i32 1
> }
>
> ; Function Attrs: mustprogress nounwind uwtable
> define dso_local noundef i32 @_Z3barv() #0 {
> ; GENERIC-LABEL: define dso_local noundef i32 @_Z3barv(
> ; GENERIC-SAME: ) #[[ATTR0]] {
> ; GENERIC-NEXT: ret i32 2
> ;
> ; AMDGPU-LABEL: define dso_local noundef i32 @_Z3barv(
> ; AMDGPU-SAME: ) #[[ATTR0]] {
> ; AMDGPU-NEXT: ret i32 2
> ;
> ret i32 2
> }
>
> ; Function Attrs: mustprogress norecurse uwtable
> define amdgpu_kernel void @main(i32 noundef %0, ptr %out) #1 {
> ; GENERIC-LABEL: define amdgpu_kernel void @main(
> ; GENERIC: [[TMP11:%.*]] = call noundef i32 @_Z3barv()
> ; GENERIC: [[TMP14:%.*]] = call noundef i32 @_Z3foov()
> ;
> ; AMDGPU-LABEL: define amdgpu_kernel void @main(
> ; AMDGPU: [[TMP11:%.*]] = call noundef i32 @_Z3barv()
> ; AMDGPU: [[TMP14:%.*]] = call noundef i32 @_Z3foov()
> ;
> %2 = alloca i32, align 4
> %3 = alloca i32, align 4
> %4 = alloca ptr, align 8
> store i32 0, ptr %2, align 4
> store i32 %0, ptr %3, align 4
> call void @llvm.lifetime.start.p0(i64 8, ptr %4) #3
> store ptr @_Z3foov, ptr %4, align 8
> %5 = load i32, ptr %3, align 4
> %6 = icmp ne i32 %5, 0
> br i1 %6, label %7, label %8
>
> 7: ; preds = %1
> store ptr @_Z3barv, ptr %4, align 8
> br label %8
>
> 8: ; preds = %7, %1
> %9 = load ptr, ptr %4, align 8
> %10 = call noundef i32 %9()
> store i32 %10, ptr %out, align 4
> call void @llvm.lifetime.end.p0(i64 8, ptr %4) #3
> ret void
> }
>
> ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
> declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
>
> ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
> declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
>
> attributes #0 = { mustprogress nounwind uwtable }
> attributes #1 = { mustprogress norecurse uwtable }
> attributes #2 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
> attributes #3 = { nounwind }
> ```
It can't be specialized because the function pointer is not uniform.
https://github.com/llvm/llvm-project/pull/66488
More information about the llvm-commits
mailing list