[clang] 1a368ae - [CUDA] fix builtin constraints for PTX 7.2
Jordan Rupprecht via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 19 09:59:45 PST 2021
Author: Artem Belevich
Date: 2021-02-19T09:57:21-08:00
New Revision: 1a368ae3b78dd7a364e8f17658fddaf86b1e98db
URL: https://github.com/llvm/llvm-project/commit/1a368ae3b78dd7a364e8f17658fddaf86b1e98db
DIFF: https://github.com/llvm/llvm-project/commit/1a368ae3b78dd7a364e8f17658fddaf86b1e98db.diff
LOG: [CUDA] fix builtin constraints for PTX 7.2
This fixes build issues w/ CUDA-11 introduced by https://reviews.llvm.org/D95974
Reviewed By: yaxunl
Differential Revision: https://reviews.llvm.org/D97009
Added:
Modified:
clang/include/clang/Basic/BuiltinsNVPTX.def
clang/test/CodeGen/builtins-nvptx-sm_70.cu
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 44a5e4ae01c1..b225ddcfa3fa 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -38,7 +38,9 @@
#pragma push_macro("PTX65")
#pragma push_macro("PTX70")
#pragma push_macro("PTX71")
-#define PTX71 "ptx71"
+#pragma push_macro("PTX72")
+#define PTX72 "ptx72"
+#define PTX71 "ptx71|" PTX72
#define PTX70 "ptx70|" PTX71
#define PTX65 "ptx65|" PTX70
#define PTX64 "ptx64|" PTX65
@@ -740,3 +742,4 @@ TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
#pragma pop_macro("PTX65")
#pragma pop_macro("PTX70")
#pragma pop_macro("PTX71")
+#pragma pop_macro("PTX72")
diff --git a/clang/test/CodeGen/builtins-nvptx-sm_70.cu b/clang/test/CodeGen/builtins-nvptx-sm_70.cu
index bd6b2c2b1a49..9de9a70190e2 100644
--- a/clang/test/CodeGen/builtins-nvptx-sm_70.cu
+++ b/clang/test/CodeGen/builtins-nvptx-sm_70.cu
@@ -6,6 +6,11 @@
// RUN: -fcuda-is-device -target-feature +ptx61 -DPTX61 \
// RUN: -S -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefixes=CHECK_M16,CHECK_M32_M8 %s
+// Make sure builtins still work with the latest combination of GPU & PTX.
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_86 \
+// RUN: -fcuda-is-device -target-feature +ptx72 -DPTX61 \
+// RUN: -S -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefixes=CHECK_M16,CHECK_M32_M8 %s
// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
// RUN: -DPTX61 -fcuda-is-device -S -o /dev/null -x cuda -verify=pre-sm_70 %s
// RUN: %clang_cc1 -triple nvptx-unknown-unknown \
More information about the cfe-commits
mailing list