[llvm] [AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor (PR #94647)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 6 11:20:30 PDT 2024


================
@@ -0,0 +1,914 @@
+; Test the generation of the attribute amdgpu-no-flat-scratch-init
+; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -stop-after=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX10 %s
+
+;; tests of alloca
+
+define void @without_alloca(i1 %arg0) {
+; GFX9-LABEL: define void @without_alloca(i1 %arg0)
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @without_alloca(i1 %arg0)
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI:[0-9]+]]
+  store volatile i1 %arg0, ptr addrspace(1) undef
+  ret void
+}
+
+define void @with_alloca() {
+; GFX9-LABEL: define void @with_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @with_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_kernel void @without_alloca_cc_kernel(i1 %arg0) {
+; GFX9-LABEL: define amdgpu_kernel void @without_alloca_cc_kernel(i1 %arg0)
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @without_alloca_cc_kernel(i1 %arg0)
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI2:[0-9]+]]
+  store volatile i1 %arg0, ptr addrspace(1) undef
+  ret void
+}
+
+define amdgpu_kernel void @with_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @with_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @with_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+; graphics functions won't get the attribute amdgpu-no-flat-scratch-init
+
+define amdgpu_vs void @with_alloca_cc_vs() {
+; GFX9-LABEL: define amdgpu_vs void @with_alloca_cc_vs()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_vs void @with_alloca_cc_vs()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_gs void @with_alloca_cc_gs() {
+; GFX9-LABEL: define amdgpu_gs void @with_alloca_cc_gs()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_gs void @with_alloca_cc_gs()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_ps void @with_alloca_cc_ps() {
+; GFX9-LABEL: define amdgpu_ps void @with_alloca_cc_ps()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_ps void @with_alloca_cc_ps()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_cs void @with_alloca_cc_cs() {
+; GFX9-LABEL: define amdgpu_cs void @with_alloca_cc_cs()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_cs void @with_alloca_cc_cs()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_hs void @with_alloca_cc_hs() {
+; GFX9-LABEL: define amdgpu_hs void @with_alloca_cc_hs()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_hs void @with_alloca_cc_hs()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_ls void @with_alloca_cc_ls() {
+; GFX9-LABEL: define amdgpu_ls void @with_alloca_cc_ls()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_ls void @with_alloca_cc_ls()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_es void @with_alloca_cc_es() {
+; GFX9-LABEL: define amdgpu_es void @with_alloca_cc_es()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_es void @with_alloca_cc_es()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_gfx void @with_alloca_cc_gfx() {
+; GFX9-LABEL: define amdgpu_gfx void @with_alloca_cc_gfx()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_gfx void @with_alloca_cc_gfx()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_cs_chain void @with_alloca_cc_cs_chain() {
+; GFX9-LABEL: define amdgpu_cs_chain void @with_alloca_cc_cs_chain()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_cs_chain void @with_alloca_cc_cs_chain()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define amdgpu_cs_chain_preserve void @with_alloca_cc_cs_chain_preserve() {
+; GFX9-LABEL: define amdgpu_cs_chain_preserve void @with_alloca_cc_cs_chain_preserve()
+; GFX9-SAME:  #[[ATTR_GFX9_CC_GRAPHICS2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_cs_chain_preserve void @with_alloca_cc_cs_chain_preserve()
+; GFX10-SAME:  #[[ATTR_GFX10_CC_GRAPHICS2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  ret void
+}
+
+define void @call_without_alloca() {
+; GFX9-LABEL: define void @call_without_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @call_without_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI:[0-9]+]]
+  call void @without_alloca(i1 true)
+  ret void
+}
+
+define amdgpu_kernel void @call_without_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @call_without_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @call_without_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI2:[0-9]+]]
+  call void @without_alloca(i1 true)
+  ret void
+}
+
+define void @call_with_alloca() {
+; GFX9-LABEL: define void @call_with_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @call_with_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  call void @with_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @call_with_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @call_with_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @call_with_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  call void @with_alloca()
+  ret void
+}
+
+define void @call_both_with_and_without_alloca() {
+; GFX9-LABEL: define void @call_both_with_and_without_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @call_both_with_and_without_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  call void @with_alloca()
+  call void @without_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @call_both_with_and_without_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @call_both_with_and_without_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @call_both_with_and_without_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  call void @with_alloca()
+  call void @without_alloca()
+  ret void
+}
+
+define void @call_call_without_alloca() {
+; GFX9-LABEL: define void @call_call_without_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @call_call_without_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI:[0-9]+]]
+  call void @call_without_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @call_call_without_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @call_call_without_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @call_call_without_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX9_NOFSI2:[0-9]+]]
+  call void @call_without_alloca()
+  ret void
+}
+
+define void @call_call_with_alloca() {
+; GFX9-LABEL: define void @call_call_with_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @call_call_with_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  call void @call_with_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @call_call_with_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @call_call_with_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @call_call_with_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  call void @call_with_alloca()
+  ret void
+}
+
+define void @with_alloca_call_without_alloca() {
+; GFX9-LABEL: define void @with_alloca_call_without_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @with_alloca_call_without_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @without_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @with_alloca_call_without_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @with_alloca_call_without_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @with_alloca_call_without_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @without_alloca()
+  ret void
+}
+
+define void @with_alloca_call_with_alloca() {
+; GFX9-LABEL: define void @with_alloca_call_with_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @with_alloca_call_with_alloca()
+; GFX10-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @with_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @with_alloca_call_with_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @with_alloca_call_with_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @with_alloca_call_with_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @with_alloca()
+  ret void
+}
+
+define void @with_alloca_call_call_without_alloca() {
+; GFX9-LABEL: define void @with_alloca_call_call_without_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @with_alloca_call_call_without_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @call_without_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @with_alloca_call_call_without_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @with_alloca_call_call_without_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @with_alloca_call_call_without_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @call_without_alloca()
+  ret void
+}
+
+define void @with_alloca_call_call_with_alloca() {
+; GFX9-LABEL: define void @with_alloca_call_call_with_alloca()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @with_alloca_call_call_with_alloca()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @call_with_alloca()
+  ret void
+}
+
+define amdgpu_kernel void @with_alloca_call_call_with_alloca_cc_kernel() {
+; GFX9-LABEL: define amdgpu_kernel void @with_alloca_call_call_with_alloca_cc_kernel()
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @with_alloca_call_call_with_alloca_cc_kernel()
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  %temp = alloca i32, addrspace(5)
+  store volatile i32 0, ptr addrspace(5) %temp, align 4
+  call void @call_with_alloca()
+  ret void
+}
+
+;; tests of addrspacecast
+
+define void @without_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: define void @without_global_to_flat_addrspacecast(ptr addrspace(1) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @without_global_to_flat_addrspacecast(ptr addrspace(1) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI:[0-9]+]]
+  store volatile i32 0, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @without_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI2:[0-9]+]]
+  store volatile i32 0, ptr addrspace(1) %ptr
+  ret void
+}
+
+define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @with_global_to_flat_addrspacecast(ptr addrspace(1) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  %stof = addrspacecast ptr addrspace(1) %ptr to ptr
+  store volatile i32 0, ptr %stof
+  ret void
+}
+
+define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr) {
+; GFX9-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @with_global_to_flat_addrspacecast_cc_kernel(ptr addrspace(1) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  %stof = addrspacecast ptr addrspace(1) %ptr to ptr
+  store volatile i32 0, ptr %stof
+  ret void
+}
+
+define void @without_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
+; GFX9-LABEL: define void @without_region_to_flat_addrspacecast(ptr addrspace(2) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @without_region_to_flat_addrspacecast(ptr addrspace(2) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI:[0-9]+]]
+  store volatile i32 0, ptr addrspace(2) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) {
+; GFX9-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @without_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NOFSI2:[0-9]+]]
+  store volatile i32 0, ptr addrspace(2) %ptr
+  ret void
+}
+
+define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr) {
+; GFX9-LABEL: define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI:[0-9]+]]
+;
+; GFX10-LABEL: define void @with_region_to_flat_addrspacecast(ptr addrspace(2) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI:[0-9]+]]
+  %stof = addrspacecast ptr addrspace(2) %ptr to ptr
+  store volatile i32 0, ptr %stof
+  ret void
+}
+
+define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr) {
+; GFX9-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr)
+; GFX9-SAME:  #[[ATTR_GFX9_NO_NOFSI2:[0-9]+]]
+;
+; GFX10-LABEL: define amdgpu_kernel void @with_region_to_flat_addrspacecast_cc_kernel(ptr addrspace(2) %ptr)
+; GFX10-SAME:  #[[ATTR_GFX10_NO_NOFSI2:[0-9]+]]
+  %stof = addrspacecast ptr addrspace(2) %ptr to ptr
----------------
arsenm wrote:

Also add a test with the amdgcn addrspacecast nonnull intrinsic 

https://github.com/llvm/llvm-project/pull/94647


More information about the llvm-commits mailing list