[llvm] [AMDGPU] Change handling of unsupported non-compute shaders with HSA (PR #126798)

Robert Imschweiler via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 01:57:06 PST 2025


https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/126798

>From b45c11221ec8b19afbac965ead59e64d5b07312c Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 11 Feb 2025 14:22:41 -0600
Subject: [PATCH 1/4] [AMDGPU] Change handling of unsupported non-compute
 shaders with HSA

Previous handling in `SITargetLowering::LowerFormalArguments` only
reported a diagnostic message and continued execution by returning a
non-usable `SDValue`. This results in llvm crashing later with an
unrelated error.  This commit changes the detection of an unsupported
non-compute shader to be a fatal error right away.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  5 +----
 .../CodeGen/AMDGPU/no-hsa-graphics-shaders.ll | 19 -------------------
 2 files changed, 1 insertion(+), 23 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b632c50dae0e3..746bde4de5e9f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2824,10 +2824,7 @@ SDValue SITargetLowering::LowerFormalArguments(
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
 
   if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
-    DiagnosticInfoUnsupported NoGraphicsHSA(
-        Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
-    DAG.getContext()->diagnose(NoGraphicsHSA);
-    return DAG.getEntryNode();
+    report_fatal_error("unsupported non-compute shaders with HSA");
   }
 
   SmallVector<ISD::InputArg, 16> Splits;
diff --git a/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll b/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
deleted file mode 100644
index ee6a578c72859..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: not llc -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s
-
-; CHECK: in function pixel_s{{.*}}: unsupported non-compute shaders with HSA
-define amdgpu_ps void @pixel_shader() #0 {
-  ret void
-}
-
-; CHECK: in function vertex_s{{.*}}: unsupported non-compute shaders with HSA
-define amdgpu_vs void @vertex_shader() #0 {
-  ret void
-}
-
-; CHECK: in function geometry_s{{.*}}: unsupported non-compute shaders with HSA
-define amdgpu_gs void @geometry_shader() #0 {
-  ret void
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}

>From 891ba84f6cf50d9f03e958824a0e5a2f9e2b1225 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 12 Feb 2025 07:31:23 -0600
Subject: [PATCH 2/4] implement feedback

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   8 +-
 .../CodeGen/AMDGPU/no-hsa-graphics-shaders.ll | 174 ++++++++++++++++++
 2 files changed, 180 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 746bde4de5e9f..1eb940747bcef 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2822,9 +2822,13 @@ SDValue SITargetLowering::LowerFormalArguments(
   const Function &Fn = MF.getFunction();
   FunctionType *FType = MF.getFunction().getFunctionType();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  bool IsUnsupportedHsa = false;
 
   if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
-    report_fatal_error("unsupported non-compute shaders with HSA");
+    DiagnosticInfoUnsupported NoGraphicsHSA(
+        Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
+    DAG.getContext()->diagnose(NoGraphicsHSA);
+    IsUnsupportedHsa = true;
   }
 
   SmallVector<ISD::InputArg, 16> Splits;
@@ -2933,7 +2937,7 @@ SDValue SITargetLowering::LowerFormalArguments(
 
   for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
     const ISD::InputArg &Arg = Ins[i];
-    if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) {
+    if ((Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) || IsUnsupportedHsa) {
       InVals.push_back(DAG.getUNDEF(Arg.VT));
       continue;
     }
diff --git a/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll b/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
new file mode 100644
index 0000000000000..aa8aab537669a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --tool not --version 5
+; RUN: not llc -mtriple=amdgcn-unknown-amdhsa -O0 -stop-after=amdgpu-isel -o - < %s 2>&1 | FileCheck %s
+
+ at I = global i32 42
+ at P = global ptr @I
+
+; CHECK: error: <unknown>:0:0: in function pixel_shader_zero_args void (): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function pixel_shader_one_arg void (ptr): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function pixel_shader_two_args void (ptr, i32): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function vertex_shader_zero_args void (): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function vertex_shader_one_arg void (ptr): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function vertex_shader_two_args void (ptr, i32): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function geometry_shader_zero_args void (): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function geometry_shader_one_arg void (ptr): unsupported non-compute shaders with HSA
+; CHECK: error: <unknown>:0:0: in function geometry_shader_two_args void (ptr, i32): unsupported non-compute shaders with HSA
+
+; CHECK-LABEL: name: pixel_shader_zero_args
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
+; CHECK-NEXT:  %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %5:vreg_64 = COPY %3
+; CHECK-NEXT:  %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
+; CHECK-NEXT:  %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
+; CHECK-NEXT:  %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %8:vreg_64 = COPY %7
+; CHECK-NEXT:  FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_ps void @pixel_shader_zero_args() {
+  %i = load i32, ptr @I
+  store i32 %i, ptr @P
+  ret void
+}
+
+; CHECK-LABEL: name: pixel_shader_one_arg
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %4:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
+; CHECK-NEXT:  %2:vreg_64 = COPY %3
+; CHECK-NEXT:  %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
+; CHECK-NEXT:  %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %11:vreg_64 = COPY %9
+; CHECK-NEXT:  %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
+; CHECK-NEXT:  %12:vreg_64 = COPY %3
+; CHECK-NEXT:  FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_ps void @pixel_shader_one_arg(ptr %p) {
+  %i = load i32, ptr @I
+  store i32 %i, ptr %p
+  ret void
+}
+
+; CHECK-LABEL: name: pixel_shader_two_args
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %8:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
+; CHECK-NEXT:  %3:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %2:vreg_64 = COPY %4
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_ps void @pixel_shader_two_args(ptr %p, i32 %i) {
+  store i32 %i, ptr %p
+  ret void
+}
+
+; CHECK-LABEL: name: vertex_shader_zero_args
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
+; CHECK-NEXT:  %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %5:vreg_64 = COPY %3
+; CHECK-NEXT:  %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
+; CHECK-NEXT:  %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
+; CHECK-NEXT:  %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %8:vreg_64 = COPY %7
+; CHECK-NEXT:  FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_vs void @vertex_shader_zero_args() {
+  %i = load i32, ptr @I
+  store i32 %i, ptr @P
+  ret void
+}
+
+; CHECK-LABEL: name: vertex_shader_one_arg
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %4:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
+; CHECK-NEXT:  %2:vreg_64 = COPY %3
+; CHECK-NEXT:  %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
+; CHECK-NEXT:  %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %11:vreg_64 = COPY %9
+; CHECK-NEXT:  %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
+; CHECK-NEXT:  %12:vreg_64 = COPY %3
+; CHECK-NEXT:  FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_vs void @vertex_shader_one_arg(ptr %p) {
+  %i = load i32, ptr @I
+  store i32 %i, ptr %p
+  ret void
+}
+
+; CHECK-LABEL: name: vertex_shader_two_args
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %8:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
+; CHECK-NEXT:  %3:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %2:vreg_64 = COPY %4
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_vs void @vertex_shader_two_args(ptr %p, i32 %i) {
+  store i32 %i, ptr %p
+  ret void
+}
+
+; CHECK-LABEL: name: geometry_shader_zero_args
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
+; CHECK-NEXT:  %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %5:vreg_64 = COPY %3
+; CHECK-NEXT:  %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
+; CHECK-NEXT:  %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
+; CHECK-NEXT:  %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %8:vreg_64 = COPY %7
+; CHECK-NEXT:  FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_gs void @geometry_shader_zero_args() {
+  %i = load i32, ptr @I
+  store i32 %i, ptr @P
+  ret void
+}
+
+; CHECK-LABEL: name: geometry_shader_one_arg
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %4:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
+; CHECK-NEXT:  %2:vreg_64 = COPY %3
+; CHECK-NEXT:  %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
+; CHECK-NEXT:  %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+; CHECK-NEXT:  %11:vreg_64 = COPY %9
+; CHECK-NEXT:  %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
+; CHECK-NEXT:  %12:vreg_64 = COPY %3
+; CHECK-NEXT:  FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_gs void @geometry_shader_one_arg(ptr %p) {
+  %i = load i32, ptr @I
+  store i32 %i, ptr %p
+  ret void
+}
+
+; CHECK-LABEL: name: geometry_shader_two_args
+; CHECK:  bb.0 (%ir-block.0):
+; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %8:sreg_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
+; CHECK-NEXT:  %3:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT:  %2:vreg_64 = COPY %4
+; CHECK-NEXT:  S_ENDPGM 0
+define amdgpu_gs void @geometry_shader_two_args(ptr %p, i32 %i) {
+  store i32 %i, ptr %p
+  ret void
+}

>From 61161b8fb56e2aa8b681b06aebaff1dec5b863b9 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 12 Feb 2025 07:35:16 -0600
Subject: [PATCH 3/4] fix formatting

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1eb940747bcef..cc9cab2107c0d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2937,7 +2937,8 @@ SDValue SITargetLowering::LowerFormalArguments(
 
   for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
     const ISD::InputArg &Arg = Ins[i];
-    if ((Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) || IsUnsupportedHsa) {
+    if ((Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) ||
+        IsUnsupportedHsa) {
       InVals.push_back(DAG.getUNDEF(Arg.VT));
       continue;
     }

>From a2217826f083a80e022a04707cd0a0d4f55fb96d Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 13 Feb 2025 03:56:28 -0600
Subject: [PATCH 4/4] implement feedback

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   7 +-
 .../CodeGen/AMDGPU/no-hsa-graphics-shaders.ll | 111 +-----------------
 2 files changed, 4 insertions(+), 114 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cc9cab2107c0d..afed489d502cc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2822,13 +2822,13 @@ SDValue SITargetLowering::LowerFormalArguments(
   const Function &Fn = MF.getFunction();
   FunctionType *FType = MF.getFunction().getFunctionType();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  bool IsUnsupportedHsa = false;
+  bool IsError = false;
 
   if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
     DiagnosticInfoUnsupported NoGraphicsHSA(
         Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
     DAG.getContext()->diagnose(NoGraphicsHSA);
-    IsUnsupportedHsa = true;
+    IsError = true;
   }
 
   SmallVector<ISD::InputArg, 16> Splits;
@@ -2937,8 +2937,7 @@ SDValue SITargetLowering::LowerFormalArguments(
 
   for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
     const ISD::InputArg &Arg = Ins[i];
-    if ((Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) ||
-        IsUnsupportedHsa) {
+    if ((Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) || IsError) {
       InVals.push_back(DAG.getUNDEF(Arg.VT));
       continue;
     }
diff --git a/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll b/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
index aa8aab537669a..60d1df11bfddf 100644
--- a/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
@@ -1,5 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --tool not --version 5
-; RUN: not llc -mtriple=amdgcn-unknown-amdhsa -O0 -stop-after=amdgpu-isel -o - < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn-unknown-amdhsa -O0 -filetype=null < %s 2>&1 | FileCheck %s
 
 @I = global i32 42
 @P = global ptr @I
@@ -14,160 +13,52 @@
 ; CHECK: error: <unknown>:0:0: in function geometry_shader_one_arg void (ptr): unsupported non-compute shaders with HSA
 ; CHECK: error: <unknown>:0:0: in function geometry_shader_two_args void (ptr, i32): unsupported non-compute shaders with HSA
 
-; CHECK-LABEL: name: pixel_shader_zero_args
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
-; CHECK-NEXT:  %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %5:vreg_64 = COPY %3
-; CHECK-NEXT:  %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
-; CHECK-NEXT:  %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
-; CHECK-NEXT:  %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %8:vreg_64 = COPY %7
-; CHECK-NEXT:  FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_ps void @pixel_shader_zero_args() {
   %i = load i32, ptr @I
   store i32 %i, ptr @P
   ret void
 }
 
-; CHECK-LABEL: name: pixel_shader_one_arg
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %4:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
-; CHECK-NEXT:  %2:vreg_64 = COPY %3
-; CHECK-NEXT:  %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
-; CHECK-NEXT:  %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %11:vreg_64 = COPY %9
-; CHECK-NEXT:  %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
-; CHECK-NEXT:  %12:vreg_64 = COPY %3
-; CHECK-NEXT:  FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_ps void @pixel_shader_one_arg(ptr %p) {
   %i = load i32, ptr @I
   store i32 %i, ptr %p
   ret void
 }
 
-; CHECK-LABEL: name: pixel_shader_two_args
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %8:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
-; CHECK-NEXT:  %3:vgpr_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %2:vreg_64 = COPY %4
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_ps void @pixel_shader_two_args(ptr %p, i32 %i) {
   store i32 %i, ptr %p
   ret void
 }
 
-; CHECK-LABEL: name: vertex_shader_zero_args
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
-; CHECK-NEXT:  %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %5:vreg_64 = COPY %3
-; CHECK-NEXT:  %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
-; CHECK-NEXT:  %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
-; CHECK-NEXT:  %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %8:vreg_64 = COPY %7
-; CHECK-NEXT:  FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_vs void @vertex_shader_zero_args() {
   %i = load i32, ptr @I
   store i32 %i, ptr @P
   ret void
 }
 
-; CHECK-LABEL: name: vertex_shader_one_arg
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %4:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
-; CHECK-NEXT:  %2:vreg_64 = COPY %3
-; CHECK-NEXT:  %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
-; CHECK-NEXT:  %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %11:vreg_64 = COPY %9
-; CHECK-NEXT:  %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
-; CHECK-NEXT:  %12:vreg_64 = COPY %3
-; CHECK-NEXT:  FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_vs void @vertex_shader_one_arg(ptr %p) {
   %i = load i32, ptr @I
   store i32 %i, ptr %p
   ret void
 }
 
-; CHECK-LABEL: name: vertex_shader_two_args
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %8:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
-; CHECK-NEXT:  %3:vgpr_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %2:vreg_64 = COPY %4
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_vs void @vertex_shader_two_args(ptr %p, i32 %i) {
   store i32 %i, ptr %p
   ret void
 }
 
-; CHECK-LABEL: name: geometry_shader_zero_args
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
-; CHECK-NEXT:  %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %5:vreg_64 = COPY %3
-; CHECK-NEXT:  %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
-; CHECK-NEXT:  %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
-; CHECK-NEXT:  %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %8:vreg_64 = COPY %7
-; CHECK-NEXT:  FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_gs void @geometry_shader_zero_args() {
   %i = load i32, ptr @I
   store i32 %i, ptr @P
   ret void
 }
 
-; CHECK-LABEL: name: geometry_shader_one_arg
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %4:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
-; CHECK-NEXT:  %2:vreg_64 = COPY %3
-; CHECK-NEXT:  %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
-; CHECK-NEXT:  %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
-; CHECK-NEXT:  %11:vreg_64 = COPY %9
-; CHECK-NEXT:  %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
-; CHECK-NEXT:  %12:vreg_64 = COPY %3
-; CHECK-NEXT:  FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_gs void @geometry_shader_one_arg(ptr %p) {
   %i = load i32, ptr @I
   store i32 %i, ptr %p
   ret void
 }
 
-; CHECK-LABEL: name: geometry_shader_two_args
-; CHECK:  bb.0 (%ir-block.0):
-; CHECK-NEXT:  %5:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %6:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %7:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %8:sreg_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
-; CHECK-NEXT:  %3:vgpr_32 = IMPLICIT_DEF
-; CHECK-NEXT:  %2:vreg_64 = COPY %4
-; CHECK-NEXT:  S_ENDPGM 0
 define amdgpu_gs void @geometry_shader_two_args(ptr %p, i32 %i) {
   store i32 %i, ptr %p
   ret void



More information about the llvm-commits mailing list