[clang] f4bcd7f - AMDGPU/clang: Add builtins for llvm.amdgcn.ballot

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 29 14:59:00 PST 2022


Author: Matt Arsenault
Date: 2022-12-29T17:58:55-05:00
New Revision: f4bcd7f598331457cfe74e459b489d4098369511

URL: https://github.com/llvm/llvm-project/commit/f4bcd7f598331457cfe74e459b489d4098369511
DIFF: https://github.com/llvm/llvm-project/commit/f4bcd7f598331457cfe74e459b489d4098369511.diff

LOG: AMDGPU/clang: Add builtins for llvm.amdgcn.ballot

Use explicit _w32/_w64 suffixes for the wave size to be consistent
with the existing other wave dependent intrinsics. Also start
diagnosing trying to use both wave32 and wave64.

I would have preferred to avoid the +wavefrontsize64 spam on targets
where that's the only option, but avoiding this seems to be more work
than I expected.

Added: 
    clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
    clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
    clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl

Modified: 
    clang/include/clang/Basic/BuiltinsAMDGPU.def
    clang/lib/Basic/Targets/AMDGPU.cpp
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGenOpenCL/amdgpu-features.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
    clang/test/OpenMP/amdgcn-attributes.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 5e64f830fb850..9e717099f7777 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -120,12 +120,6 @@ BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_setprio, "vIs", "n")
-BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
-BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
 BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc")
 BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc")
 BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc")
@@ -155,6 +149,21 @@ BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
 BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
 BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
 
+//===----------------------------------------------------------------------===//
+// Ballot builtins.
+//===----------------------------------------------------------------------===//
+
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "Uib", "nc", "wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "LUib", "nc", "wavefrontsize64")
+
+// Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64}
+BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
+BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
+
 //===----------------------------------------------------------------------===//
 // CI+ only builtins.
 //===----------------------------------------------------------------------===//

diff  --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index cd9a7e8d93f2e..ecb786482c9a3 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -13,6 +13,7 @@
 #include "AMDGPU.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
@@ -178,6 +179,8 @@ ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
 bool AMDGPUTargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeatureVec) const {
+  const bool IsNullCPU = CPU.empty();
+  bool IsWave32Capable = false;
 
   using namespace llvm::AMDGPU;
 
@@ -188,6 +191,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
     case GK_GFX1102:
     case GK_GFX1101:
     case GK_GFX1100:
+      IsWave32Capable = true;
       Features["ci-insts"] = true;
       Features["dot1-insts"] = true;
       Features["dot5-insts"] = true;
@@ -211,6 +215,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
     case GK_GFX1032:
     case GK_GFX1031:
     case GK_GFX1030:
+      IsWave32Capable = true;
       Features["ci-insts"] = true;
       Features["dot1-insts"] = true;
       Features["dot2-insts"] = true;
@@ -238,6 +243,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
       [[fallthrough]];
     case GK_GFX1013:
     case GK_GFX1010:
+      IsWave32Capable = true;
       Features["dl-insts"] = true;
       Features["ci-insts"] = true;
       Features["flat-address-space"] = true;
@@ -334,7 +340,32 @@ bool AMDGPUTargetInfo::initFeatureMap(
     }
   }
 
-  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
+  if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
+    return false;
+
+  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
+  const bool HaveWave32 =
+      (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32");
+  const bool HaveWave64 = Features.count("wavefrontsize64");
+
+  // TODO: Should move this logic into TargetParser
+  if (HaveWave32 && HaveWave64) {
+    Diags.Report(diag::err_invalid_feature_combination)
+        << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+    return false;
+  }
+
+  // Don't assume any wavesize with an unknown subtarget.
+  if (!IsNullCPU) {
+    // Default to wave32 if available, or wave64 if not
+    if (!HaveWave32 && !HaveWave64) {
+      StringRef DefaultWaveSizeFeature =
+          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
+      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
+    }
+  }
+
+  return true;
 }
 
 void AMDGPUTargetInfo::fillValidCPUList(

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 96d4a6845fad9..cb2dd54e0f735 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -16989,6 +16989,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
   case AMDGPU::BI__builtin_amdgcn_sbfe:
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
+  case AMDGPU::BI__builtin_amdgcn_ballot_w32:
+  case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
+    return Builder.CreateCall(F, { Src });
+  }
   case AMDGPU::BI__builtin_amdgcn_uicmp:
   case AMDGPU::BI__builtin_amdgcn_uicmpl:
   case AMDGPU::BI__builtin_amdgcn_sicmp:

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl b/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
new file mode 100644
index 0000000000000..7dbf5c3c6cd59
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
@@ -0,0 +1,6 @@
+// RUN: not %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s
+
+// CHECK: error: invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive
+
+kernel void test() {}

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index ff288e530d17f..46402c377c335 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -3,6 +3,10 @@
 // Check that appropriate features are defined for every supported AMDGPU
 // "-target" and "-mcpu" options.
 
+// RUN: %clang_cc1 -triple amdgcn -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
+// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s
+// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s
+
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx602 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s
@@ -42,43 +46,50 @@
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1102 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s
 
-// GFX600: "target-features"="+s-memtime-inst"
-// GFX601: "target-features"="+s-memtime-inst"
-// GFX602: "target-features"="+s-memtime-inst"
-// GFX700: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX701: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX702: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX703: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX704: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX705: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst"
-// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
-// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
-// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
-// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
-// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
-// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
-// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts"
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s
+
+// NOCPU-NOT: "target-features"
+// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
+// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
+
+// GFX600: "target-features"="+s-memtime-inst,+wavefrontsize64"
+// GFX601: "target-features"="+s-memtime-inst,+wavefrontsize64"
+// GFX602: "target-features"="+s-memtime-inst,+wavefrontsize64"
+// GFX700: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX701: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX702: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX703: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX704: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX705: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
+// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1103-W64: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
 
 kernel void test() {}

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
index 3b21d41659bbd..c2ded5c20238a 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -37,3 +37,10 @@ void test_groupstaticsize(global uint* out)
 {
   *out = __builtin_amdgcn_groupstaticsize();
 }
+
+// CHECK-LABEL: @test_ballot_wave32(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+void test_ballot_wave32(global uint* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_w32(a == b);
+}

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
new file mode 100644
index 0000000000000..a4d14cf1f6cf0
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+
+typedef unsigned int uint;
+
+
+// CHECK-LABEL: @test_ballot_wave32(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+void test_ballot_wave32(global uint* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_w32(a == b);
+}
+
+// CHECK-LABEL: @test_ballot_wave32_target_attr(
+// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
+__attribute__((target("wavefrontsize32")))
+void test_ballot_wave32_target_attr(global uint* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_w32(a == b);
+}
+
+#if __AMDGCN_WAVEFRONT_SIZE != 32
+#error Wrong wavesize detected
+#endif

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
new file mode 100644
index 0000000000000..563c9a2a240c1
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s
+
+typedef unsigned long ulong;
+
+// CHECK-LABEL: @test_ballot_wave64(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
+void test_ballot_wave64(global ulong* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_w64(a == b);
+}
+
+// CHECK-LABEL: @test_ballot_wave64_target_attr(
+// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}})
+__attribute__((target("wavefrontsize64")))
+void test_ballot_wave64_target_attr(global ulong* out, int a, int b)
+{
+  *out = __builtin_amdgcn_ballot_w64(a == b);
+}
+
+#if __AMDGCN_WAVEFRONT_SIZE != 64
+#error Wrong wavesize detected
+#endif

diff  --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp
index b285e494a7fde..b5ff6a58381b8 100644
--- a/clang/test/OpenMP/amdgcn-attributes.cpp
+++ b/clang/test/OpenMP/amdgcn-attributes.cpp
@@ -33,11 +33,11 @@ int callable(int x) {
 }
 
 // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" }
 // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 
 // DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
+// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
 // NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }

diff  --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
new file mode 100644
index 0000000000000..543d64403fd2e
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify -S -o - %s
+
+typedef unsigned int uint;
+
+void test_ballot_wave32(global uint* out, int a, int b) {
+  *out = __builtin_amdgcn_ballot_w32(a == b);  // expected-error {{'__builtin_amdgcn_ballot_w32' needs target feature wavefrontsize32}}
+}
+
+// FIXME: Should error for subtargets that don't support wave32
+__attribute__((target("wavefrontsize32")))
+void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
+  *out = __builtin_amdgcn_ballot_w32(a == b);
+}

diff  --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl
new file mode 100644
index 0000000000000..99e93acd9a213
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-feature +wavefrontsize32 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize32 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+
+typedef unsigned long ulong;
+
+void test_ballot_wave64(global ulong* out, int a, int b) {
+  *out = __builtin_amdgcn_ballot_w64(a == b);  // expected-error {{'__builtin_amdgcn_ballot_w64' needs target feature wavefrontsize64}}
+}
+
+__attribute__((target("wavefrontsize64")))
+void test_ballot_wave64_target_attr(global ulong* out, int a, int b) {
+  *out = __builtin_amdgcn_ballot_w64(a == b);
+}


        


More information about the cfe-commits mailing list