[clang] f4bcd7f - AMDGPU/clang: Add builtins for llvm.amdgcn.ballot
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Thu Dec 29 14:59:00 PST 2022
Author: Matt Arsenault
Date: 2022-12-29T17:58:55-05:00
New Revision: f4bcd7f598331457cfe74e459b489d4098369511
URL: https://github.com/llvm/llvm-project/commit/f4bcd7f598331457cfe74e459b489d4098369511
DIFF: https://github.com/llvm/llvm-project/commit/f4bcd7f598331457cfe74e459b489d4098369511.diff
LOG: AMDGPU/clang: Add builtins for llvm.amdgcn.ballot
Use explicit _w32/_w64 suffixes for the wave size to be consistent
with the existing other wave dependent intrinsics. Also start
diagnosing trying to use both wave32 and wave64.
I would have preferred to avoid the +wavefrontsize64 spam on targets
where that's the only option, but avoiding this seems to be more work
than I expected.
Added:
clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/lib/Basic/Targets/AMDGPU.cpp
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGenOpenCL/amdgpu-features.cl
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
clang/test/OpenMP/amdgcn-attributes.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 5e64f830fb850..9e717099f7777 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -120,12 +120,6 @@ BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_setprio, "vIs", "n")
-BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
-BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc")
BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc")
BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc")
@@ -155,6 +149,21 @@ BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
+//===----------------------------------------------------------------------===//
+// Ballot builtins.
+//===----------------------------------------------------------------------===//
+
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "Uib", "nc", "wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "LUib", "nc", "wavefrontsize64")
+
+// Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64}
+BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
+BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
+
//===----------------------------------------------------------------------===//
// CI+ only builtins.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index cd9a7e8d93f2e..ecb786482c9a3 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -13,6 +13,7 @@
#include "AMDGPU.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
@@ -178,6 +179,8 @@ ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
bool AMDGPUTargetInfo::initFeatureMap(
llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
const std::vector<std::string> &FeatureVec) const {
+ const bool IsNullCPU = CPU.empty();
+ bool IsWave32Capable = false;
using namespace llvm::AMDGPU;
@@ -188,6 +191,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
case GK_GFX1102:
case GK_GFX1101:
case GK_GFX1100:
+ IsWave32Capable = true;
Features["ci-insts"] = true;
Features["dot1-insts"] = true;
Features["dot5-insts"] = true;
@@ -211,6 +215,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
case GK_GFX1032:
case GK_GFX1031:
case GK_GFX1030:
+ IsWave32Capable = true;
Features["ci-insts"] = true;
Features["dot1-insts"] = true;
Features["dot2-insts"] = true;
@@ -238,6 +243,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
[[fallthrough]];
case GK_GFX1013:
case GK_GFX1010:
+ IsWave32Capable = true;
Features["dl-insts"] = true;
Features["ci-insts"] = true;
Features["flat-address-space"] = true;
@@ -334,7 +340,32 @@ bool AMDGPUTargetInfo::initFeatureMap(
}
}
- return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
+ if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
+ return false;
+
+ // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
+ const bool HaveWave32 =
+ (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32");
+ const bool HaveWave64 = Features.count("wavefrontsize64");
+
+ // TODO: Should move this logic into TargetParser
+ if (HaveWave32 && HaveWave64) {
+ Diags.Report(diag::err_invalid_feature_combination)
+ << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+ return false;
+ }
+
+ // Don't assume any wavesize with an unknown subtarget.
+ if (!IsNullCPU) {
+ // Default to wave32 if available, or wave64 if not
+ if (!HaveWave32 && !HaveWave64) {
+ StringRef DefaultWaveSizeFeature =
+ IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
+ Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
+ }
+ }
+
+ return true;
}
void AMDGPUTargetInfo::fillValidCPUList(
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 96d4a6845fad9..cb2dd54e0f735 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -16989,6 +16989,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
case AMDGPU::BI__builtin_amdgcn_sbfe:
return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
+ case AMDGPU::BI__builtin_amdgcn_ballot_w32:
+ case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
+ return Builder.CreateCall(F, { Src });
+ }
case AMDGPU::BI__builtin_amdgcn_uicmp:
case AMDGPU::BI__builtin_amdgcn_uicmpl:
case AMDGPU::BI__builtin_amdgcn_sicmp:
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl b/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
new file mode 100644
index 0000000000000..7dbf5c3c6cd59
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
@@ -0,0 +1,6 @@
+// RUN: not %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s
+
+// CHECK: error: invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive
+
+kernel void test() {}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index ff288e530d17f..46402c377c335 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -3,6 +3,10 @@
// Check that appropriate features are defined for every supported AMDGPU
// "-target" and "-mcpu" options.
+// RUN: %clang_cc1 -triple amdgcn -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
+// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s
+// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s
+
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx602 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s
@@ -42,43 +46,50 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1102 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s
-// GFX600: "target-features"="+s-memtime-inst"
-// GFX601: "target-features"="+s-memtime-inst"
-// GFX602: "target-features"="+s-memtime-inst"
-// GFX700: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX701: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX702: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX703: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX704: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX705: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
-// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
-// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
-// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
-// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
-// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s
+
+// NOCPU-NOT: "target-features"
+// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
+// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
+
+// GFX600: "target-features"="+s-memtime-inst,+wavefrontsize64"
+// GFX601: "target-features"="+s-memtime-inst,+wavefrontsize64"
+// GFX602: "target-features"="+s-memtime-inst,+wavefrontsize64"
+// GFX700: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX701: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX702: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX703: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX704: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX705: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1103-W64: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
kernel void test() {}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
index 3b21d41659bbd..c2ded5c20238a 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -37,3 +37,10 @@ void test_groupstaticsize(global uint* out)
{
*out = __builtin_amdgcn_groupstaticsize();
}
+
+// CHECK-LABEL: @test_ballot_wave32(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+void test_ballot_wave32(global uint* out, int a, int b)
+{
+ *out = __builtin_amdgcn_ballot_w32(a == b);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
new file mode 100644
index 0000000000000..a4d14cf1f6cf0
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+
+typedef unsigned int uint;
+
+
+// CHECK-LABEL: @test_ballot_wave32(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+void test_ballot_wave32(global uint* out, int a, int b)
+{
+ *out = __builtin_amdgcn_ballot_w32(a == b);
+}
+
+// CHECK-LABEL: @test_ballot_wave32_target_attr(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+__attribute__((target("wavefrontsize32")))
+void test_ballot_wave32_target_attr(global uint* out, int a, int b)
+{
+ *out = __builtin_amdgcn_ballot_w32(a == b);
+}
+
+#if __AMDGCN_WAVEFRONT_SIZE != 32
+#error Wrong wavesize detected
+#endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
new file mode 100644
index 0000000000000..563c9a2a240c1
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+
+typedef unsigned long ulong;
+
+// CHECK-LABEL: @test_ballot_wave64(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
+void test_ballot_wave64(global ulong* out, int a, int b)
+{
+ *out = __builtin_amdgcn_ballot_w64(a == b);
+}
+
+// CHECK-LABEL: @test_ballot_wave64_target_attr(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
+__attribute__((target("wavefrontsize64")))
+void test_ballot_wave64_target_attr(global ulong* out, int a, int b)
+{
+ *out = __builtin_amdgcn_ballot_w64(a == b);
+}
+
+#if __AMDGCN_WAVEFRONT_SIZE != 64
+#error Wrong wavesize detected
+#endif
diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp
index b285e494a7fde..b5ff6a58381b8 100644
--- a/clang/test/OpenMP/amdgcn-attributes.cpp
+++ b/clang/test/OpenMP/amdgcn-attributes.cpp
@@ -33,11 +33,11 @@ int callable(int x) {
}
// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" }
// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
+// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
new file mode 100644
index 0000000000000..543d64403fd2e
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify -S -o - %s
+
+typedef unsigned int uint;
+
+void test_ballot_wave32(global uint* out, int a, int b) {
+ *out = __builtin_amdgcn_ballot_w32(a == b); // expected-error {{'__builtin_amdgcn_ballot_w32' needs target feature wavefrontsize32}}
+}
+
+// FIXME: Should error for subtargets that don't support wave32
+__attribute__((target("wavefrontsize32")))
+void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
+ *out = __builtin_amdgcn_ballot_w32(a == b);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl
new file mode 100644
index 0000000000000..99e93acd9a213
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-feature +wavefrontsize32 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize32 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+
+typedef unsigned long ulong;
+
+void test_ballot_wave64(global ulong* out, int a, int b) {
+ *out = __builtin_amdgcn_ballot_w64(a == b); // expected-error {{'__builtin_amdgcn_ballot_w64' needs target feature wavefrontsize64}}
+}
+
+__attribute__((target("wavefrontsize64")))
+void test_ballot_wave64_target_attr(global ulong* out, int a, int b) {
+ *out = __builtin_amdgcn_ballot_w64(a == b);
+}
More information about the cfe-commits
mailing list