[llvm] 60a227c - [AMDGPU] Use inreg for hint to preload kernel arguments

Austin Kerbow via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 19 15:14:04 PDT 2023


Author: Austin Kerbow
Date: 2023-09-19T15:13:38-07:00
New Revision: 60a227c464a19a00a76d19b5bc75e0e4d5c89873

URL: https://github.com/llvm/llvm-project/commit/60a227c464a19a00a76d19b5bc75e0e4d5c89873
DIFF: https://github.com/llvm/llvm-project/commit/60a227c464a19a00a76d19b5bc75e0e4d5c89873.diff

LOG: [AMDGPU] Use inreg for hint to preload kernel arguments

This patch is the first in a series that adds support for pre-loading
kernel arguments into SGPRs. The command-line argument
'amdgpu-kernarg-preload-count' is used to specify the number of
arguments sequentially from the first that we should attempt to preload,
the default is 0.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D156852

Added: 
    llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 57c873f00a4a195..d7dc37066b1fd38 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -28,6 +28,10 @@ void initializeCycleInfoWrapperPassPass(PassRegistry &);
 
 using namespace llvm;
 
+static cl::opt<unsigned> KernargPreloadCount(
+    "amdgpu-kernarg-preload-count",
+    cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
+
 #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
 
 enum ImplicitArgumentPositions {
@@ -914,6 +918,21 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
   llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
 }
 
+static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
+  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+  for (unsigned I = 0;
+       I < F.arg_size() &&
+       I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
+       ++I) {
+    Argument &Arg = *F.getArg(I);
+    // Check for incompatible attributes.
+    if (Arg.hasByRefAttr() || Arg.hasNestAttr())
+      break;
+
+    Arg.addAttr(Attribute::InReg);
+  }
+}
+
 class AMDGPUAttributor : public ModulePass {
 public:
   AMDGPUAttributor() : ModulePass(ID) {}
@@ -960,9 +979,12 @@ class AMDGPUAttributor : public ModulePass {
       if (!F.isIntrinsic()) {
         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
-        if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
+        CallingConv::ID CC = F.getCallingConv();
+        if (!AMDGPU::isEntryFunctionCC(CC)) {
           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
           A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
+        } else if (CC == CallingConv::AMDGPU_KERNEL) {
+          addPreloadKernArgHint(F, *TM);
         }
       }
     }

diff  --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
new file mode 100644
index 000000000000000..1238fa8c49a928b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-attributor -S < %s | FileCheck -check-prefix=NO-PRELOAD %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-1 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=3 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-3 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=16 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-16 %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=20 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-20 %s
+
+define amdgpu_kernel void @test_preload_hint_kernel_1(ptr %0) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
+; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
+; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
+; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
+; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
+; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+define amdgpu_kernel void @test_preload_hint_kernel_2(i32 %0, i64 %1) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
+; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
+; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
+; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
+; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
+; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+define amdgpu_kernel void @test_preload_hint_kernel_4(i32 %0, i64 %1, <2 x float> %2, ptr %3) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
+; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
+; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
+; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
+; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]]) #[[ATTR0]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
+; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]]) #[[ATTR0]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+define amdgpu_kernel void @test_preload_hint_kernel_18(i32 %0, i64 %1, <2 x float> %2, ptr %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
+; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
+; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
+; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
+; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]], i32 inreg [[TMP4:%.*]], i32 inreg [[TMP5:%.*]], i32 inreg [[TMP6:%.*]], i32 inreg [[TMP7:%.*]], i32 inreg [[TMP8:%.*]], i32 inreg [[TMP9:%.*]], i32 inreg [[TMP10:%.*]], i32 inreg [[TMP11:%.*]], i32 inreg [[TMP12:%.*]], i32 inreg [[TMP13:%.*]], i32 inreg [[TMP14:%.*]], i32 inreg [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
+; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]], i32 inreg [[TMP4:%.*]], i32 inreg [[TMP5:%.*]], i32 inreg [[TMP6:%.*]], i32 inreg [[TMP7:%.*]], i32 inreg [[TMP8:%.*]], i32 inreg [[TMP9:%.*]], i32 inreg [[TMP10:%.*]], i32 inreg [[TMP11:%.*]], i32 inreg [[TMP12:%.*]], i32 inreg [[TMP13:%.*]], i32 inreg [[TMP14:%.*]], i32 inreg [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+define void @test_preload_hint_non_kernel_2(i32 %0, i64 %1) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
+; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
+; PRELOAD-1-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
+; PRELOAD-3-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
+; PRELOAD-16-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
+; PRELOAD-20-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+define amdgpu_kernel void @test_preload_hint_kernel_1_call_func(ptr %0) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
+; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
+; NO-PRELOAD-NEXT:    call void @func(ptr [[TMP0]])
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
+; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
+; PRELOAD-1-NEXT:    call void @func(ptr [[TMP0]])
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
+; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
+; PRELOAD-3-NEXT:    call void @func(ptr [[TMP0]])
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
+; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
+; PRELOAD-16-NEXT:    call void @func(ptr [[TMP0]])
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
+; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
+; PRELOAD-20-NEXT:    call void @func(ptr [[TMP0]])
+; PRELOAD-20-NEXT:    ret void
+;
+  call void @func(ptr %0)
+  ret void
+}
+
+define amdgpu_kernel void @test_preload_hint_kernel_1_call_intrinsic(i16 %0) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
+; NO-PRELOAD-SAME: (i16 [[TMP0:%.*]]) #[[ATTR2]] {
+; NO-PRELOAD-NEXT:    call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
+; PRELOAD-1-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
+; PRELOAD-1-NEXT:    call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
+; PRELOAD-3-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
+; PRELOAD-3-NEXT:    call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
+; PRELOAD-16-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
+; PRELOAD-16-NEXT:    call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
+; PRELOAD-20-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] {
+; PRELOAD-20-NEXT:    call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
+; PRELOAD-20-NEXT:    ret void
+;
+  call void @llvm.amdgcn.set.prio(i16 %0)
+  ret void
+}
+
+define spir_kernel void @test_preload_hint_kernel_1_spir_cc(ptr %0) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
+; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
+; PRELOAD-1-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
+; PRELOAD-3-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
+; PRELOAD-16-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
+; PRELOAD-20-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+define amdgpu_kernel void @test_preload_hint_kernel_2_preexisting(i32 inreg %0, i64 %1) #0 {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
+; NO-PRELOAD-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
+; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
+; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
+; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
+; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+define amdgpu_kernel void @test_preload_hint_kernel_incompatible_attributes(ptr addrspace(4) byref(i32) %0, ptr nest %1) {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
+; NO-PRELOAD-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+; NO-PRELOAD-NEXT:    ret void
+;
+; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
+; PRELOAD-1-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+; PRELOAD-1-NEXT:    ret void
+;
+; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
+; PRELOAD-3-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+; PRELOAD-3-NEXT:    ret void
+;
+; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
+; PRELOAD-16-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+; PRELOAD-16-NEXT:    ret void
+;
+; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
+; PRELOAD-20-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+; PRELOAD-20-NEXT:    ret void
+;
+  ret void
+}
+
+declare void @func(ptr) #0
+declare void @llvm.amdgcn.set.prio(i16)
+
+attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list