[clang] 6fe70cb - clang/AMDGPU: Force disable block enqueue arguments for HIP
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Sat Jan 7 10:39:10 PST 2023
Author: Matt Arsenault
Date: 2023-01-07T13:39:05-05:00
New Revision: 6fe70cb465654eafafd272231e23762adeab4290
URL: https://github.com/llvm/llvm-project/commit/6fe70cb465654eafafd272231e23762adeab4290
DIFF: https://github.com/llvm/llvm-project/commit/6fe70cb465654eafafd272231e23762adeab4290.diff
LOG: clang/AMDGPU: Force disable block enqueue arguments for HIP
This is a dirty, dirty hack to workaround bot failures at
-O0. Currently these fields are only used by OpenCL features and
evidently the HIP runtime isn't expecting to see them in HIP
programs. The code objects should be language agnostic, so just force
optimize these out until the runtime is fixed.
Added:
clang/test/CodeGenHIP/default-attributes.hip
Modified:
clang/lib/CodeGen/TargetInfo.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index aec170ae5570..ee8852903eda 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -9520,6 +9520,15 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (IsHIPKernel) {
+ // FIXME: This is a dirty, dirty hack to fix bot failures at -O0 and should
+ // be removed. The HIP runtime currently fails to handle the case where one
+ // of these fields fails to optimize out. The runtime should tolerate all
+ // requested implicit inputs regardless of language.
+ F->addFnAttr("amdgpu-no-default-queue");
+ F->addFnAttr("amdgpu-no-completion-action");
+ }
}
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
new file mode 100644
index 000000000000..b4f4a6201956
--- /dev/null
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \
+// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s
+
+// RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device \
+// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+
+// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
+// OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv
+// OPTNONE-SAME: () #[[ATTR0:[0-9]+]] {
+// OPTNONE-NEXT: entry:
+// OPTNONE-NEXT: ret void
+//
+// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+// OPT-LABEL: define {{[^@]+}}@_Z4funcv
+// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// OPT-NEXT: entry:
+// OPT-NEXT: ret void
+//
+__device__ void func() {
+
+}
+
+// OPTNONE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
+// OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv
+// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] {
+// OPTNONE-NEXT: entry:
+// OPTNONE-NEXT: ret void
+//
+// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+// OPT-LABEL: define {{[^@]+}}@_Z6kernelv
+// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// OPT-NEXT: entry:
+// OPT-NEXT: ret void
+//
+__global__ void kernel() {
+
+}
+//.
+// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
+//.
+// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
More information about the cfe-commits
mailing list