[llvm-branch-commits] [clang] [flang] [llvm] [AMDGPU] More radical feature initialization refactoring (PR #155222)
    Stanislav Mekhanoshin via llvm-branch-commits 
    llvm-branch-commits at lists.llvm.org
       
    Mon Aug 25 01:20:21 PDT 2025
    
    
  
https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/155222
Factoring in flang, just have a single fillAMDGPUFeatureMap
function doing it all as an external interface and returing
an error.
>From 24dfb4bc7e3c0b1e20d5da574757ee527d499ff9 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Mon, 25 Aug 2025 01:17:09 -0700
Subject: [PATCH] [AMDGPU] More radical feature initialization refactoring
Factoring in flang, just have a single fillAMDGPUFeatureMap
function doing it all as an external interface and returing
an error.
---
 clang/lib/Basic/Targets/AMDGPU.cpp            |   3 +-
 flang/lib/Frontend/CompilerInstance.cpp       |  14 +-
 llvm/include/llvm/TargetParser/TargetParser.h |   8 +-
 llvm/lib/TargetParser/TargetParser.cpp        | 627 +++++++++---------
 4 files changed, 325 insertions(+), 327 deletions(-)
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index a235cccac516b..87de9e6865e71 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -201,8 +201,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
     return false;
 
-  // TODO: Should move this logic into TargetParser
-  auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
+  auto HasError = fillAMDGPUFeatureMap(CPU, getTriple(), Features);
   switch (HasError.first) {
   default:
     break;
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index cd8dddad05282..f6a9dd0ebc70c 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -253,18 +253,16 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
                                            const TargetOptions &targetOpts,
                                            const llvm::Triple triple) {
   llvm::StringRef cpu = targetOpts.cpu;
-  llvm::StringMap<bool> implicitFeaturesMap;
-  // Get the set of implicit target features
-  llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap);
+  llvm::StringMap<bool> FeaturesMap;
 
   // Add target features specified by the user
   for (auto &userFeature : targetOpts.featuresAsWritten) {
     std::string userKeyString = userFeature.substr(1);
-    implicitFeaturesMap[userKeyString] = (userFeature[0] == '+');
+    FeaturesMap[userKeyString] = (userFeature[0] == '+');
   }
 
   auto HasError =
-      llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap);
+      llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, FeaturesMap);
   if (HasError.first) {
     unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
                                             "Unsupported feature ID: %0");
@@ -273,9 +271,9 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
   }
 
   llvm::SmallVector<std::string> featuresVec;
-  for (auto &implicitFeatureItem : implicitFeaturesMap) {
-    featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
-                           implicitFeatureItem.first().str())
+  for (auto &FeatureItem : FeaturesMap) {
+    featuresVec.push_back((llvm::Twine(FeatureItem.second ? "+" : "-") +
+                           FeatureItem.first().str())
                               .str());
   }
   llvm::sort(featuresVec);
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 2f68d66dee90f..0739207e26b5f 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -184,14 +184,8 @@ LLVM_ABI void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
 LLVM_ABI IsaVersion getIsaVersion(StringRef GPU);
 
 /// Fills Features map with default values for given target GPU
-LLVM_ABI void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
-                                   StringMap<bool> &Features);
-
-/// Inserts wave size feature for given GPU into features map
 LLVM_ABI std::pair<FeatureError, StringRef>
-insertWaveSizeFeature(StringRef GPU, const Triple &T,
-                      StringMap<bool> &Features);
-
+fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap<bool> &Features);
 } // namespace AMDGPU
 
 struct BasicSubtargetFeatureKV {
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 480622d6338fc..8de28adcfb110 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -364,8 +364,320 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
   return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
 }
 
-void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
-                                  StringMap<bool> &Features) {
+static std::pair<FeatureError, StringRef>
+insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                      const StringMap<bool> &DefaultFeatures,
+                      StringMap<bool> &Features) {
+  const bool IsNullGPU = GPU.empty();
+  const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32");
+  const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64");
+  const bool HaveWave32 = Features.count("wavefrontsize32");
+  const bool HaveWave64 = Features.count("wavefrontsize64");
+  if (HaveWave32 && HaveWave64)
+    return {AMDGPU::INVALID_FEATURE_COMBINATION,
+            "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"};
+
+  if (HaveWave32 && !IsNullGPU && TargetHasWave64)
+    return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"};
+
+  if (HaveWave64 && !IsNullGPU && TargetHasWave32)
+    return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"};
+
+  // Don't assume any wavesize with an unknown subtarget.
+  // Default to wave32 if target supports both.
+  if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 &&
+      !TargetHasWave64)
+    Features.insert(std::make_pair("wavefrontsize32", true));
+
+  for (const auto &Entry : DefaultFeatures) {
+    if (!Features.count(Entry.getKey()))
+      Features[Entry.getKey()] = Entry.getValue();
+  }
+
+  return {NO_ERROR, StringRef()};
+}
+
+static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
+                                 StringMap<bool> &Features) {
+  AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
+  switch (Kind) {
+  case GK_GFX1250:
+    Features["ci-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot8-insts"] = true;
+    Features["dl-insts"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["gfx8-insts"] = true;
+    Features["gfx9-insts"] = true;
+    Features["gfx10-insts"] = true;
+    Features["gfx10-3-insts"] = true;
+    Features["gfx11-insts"] = true;
+    Features["gfx12-insts"] = true;
+    Features["gfx1250-insts"] = true;
+    Features["bitop3-insts"] = true;
+    Features["prng-inst"] = true;
+    Features["tanh-insts"] = true;
+    Features["tensor-cvt-lut-insts"] = true;
+    Features["transpose-load-f4f6-insts"] = true;
+    Features["bf16-trans-insts"] = true;
+    Features["bf16-cvt-insts"] = true;
+    Features["fp8-conversion-insts"] = true;
+    Features["fp8e5m3-insts"] = true;
+    Features["permlane16-swap"] = true;
+    Features["ashr-pk-insts"] = true;
+    Features["atomic-buffer-pk-add-bf16-inst"] = true;
+    Features["vmem-pref-insts"] = true;
+    Features["atomic-fadd-rtn-insts"] = true;
+    Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+    Features["atomic-flat-pk-add-16-insts"] = true;
+    Features["atomic-global-pk-add-bf16-inst"] = true;
+    Features["atomic-ds-pk-add-16-insts"] = true;
+    Features["setprio-inc-wg-inst"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    Features["atomic-fmin-fmax-global-f64"] = true;
+    Features["wavefrontsize32"] = true;
+    break;
+  case GK_GFX1201:
+  case GK_GFX1200:
+  case GK_GFX12_GENERIC:
+    Features["ci-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot8-insts"] = true;
+    Features["dot9-insts"] = true;
+    Features["dot10-insts"] = true;
+    Features["dot11-insts"] = true;
+    Features["dot12-insts"] = true;
+    Features["dl-insts"] = true;
+    Features["atomic-ds-pk-add-16-insts"] = true;
+    Features["atomic-flat-pk-add-16-insts"] = true;
+    Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+    Features["atomic-buffer-pk-add-bf16-inst"] = true;
+    Features["atomic-global-pk-add-bf16-inst"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["gfx8-insts"] = true;
+    Features["gfx9-insts"] = true;
+    Features["gfx10-insts"] = true;
+    Features["gfx10-3-insts"] = true;
+    Features["gfx11-insts"] = true;
+    Features["gfx12-insts"] = true;
+    Features["atomic-fadd-rtn-insts"] = true;
+    Features["image-insts"] = true;
+    Features["fp8-conversion-insts"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    break;
+  case GK_GFX1153:
+  case GK_GFX1152:
+  case GK_GFX1151:
+  case GK_GFX1150:
+  case GK_GFX1103:
+  case GK_GFX1102:
+  case GK_GFX1101:
+  case GK_GFX1100:
+  case GK_GFX11_GENERIC:
+    Features["ci-insts"] = true;
+    Features["dot5-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot8-insts"] = true;
+    Features["dot9-insts"] = true;
+    Features["dot10-insts"] = true;
+    Features["dot12-insts"] = true;
+    Features["dl-insts"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["gfx8-insts"] = true;
+    Features["gfx9-insts"] = true;
+    Features["gfx10-insts"] = true;
+    Features["gfx10-3-insts"] = true;
+    Features["gfx11-insts"] = true;
+    Features["atomic-fadd-rtn-insts"] = true;
+    Features["image-insts"] = true;
+    Features["gws"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    break;
+  case GK_GFX1036:
+  case GK_GFX1035:
+  case GK_GFX1034:
+  case GK_GFX1033:
+  case GK_GFX1032:
+  case GK_GFX1031:
+  case GK_GFX1030:
+  case GK_GFX10_3_GENERIC:
+    Features["ci-insts"] = true;
+    Features["dot1-insts"] = true;
+    Features["dot2-insts"] = true;
+    Features["dot5-insts"] = true;
+    Features["dot6-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot10-insts"] = true;
+    Features["dl-insts"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["gfx8-insts"] = true;
+    Features["gfx9-insts"] = true;
+    Features["gfx10-insts"] = true;
+    Features["gfx10-3-insts"] = true;
+    Features["image-insts"] = true;
+    Features["s-memrealtime"] = true;
+    Features["s-memtime-inst"] = true;
+    Features["gws"] = true;
+    Features["vmem-to-lds-load-insts"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    Features["atomic-fmin-fmax-global-f64"] = true;
+    break;
+  case GK_GFX1012:
+  case GK_GFX1011:
+    Features["dot1-insts"] = true;
+    Features["dot2-insts"] = true;
+    Features["dot5-insts"] = true;
+    Features["dot6-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot10-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX1013:
+  case GK_GFX1010:
+  case GK_GFX10_1_GENERIC:
+    Features["dl-insts"] = true;
+    Features["ci-insts"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["gfx8-insts"] = true;
+    Features["gfx9-insts"] = true;
+    Features["gfx10-insts"] = true;
+    Features["image-insts"] = true;
+    Features["s-memrealtime"] = true;
+    Features["s-memtime-inst"] = true;
+    Features["gws"] = true;
+    Features["vmem-to-lds-load-insts"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    Features["atomic-fmin-fmax-global-f64"] = true;
+    break;
+  case GK_GFX950:
+    Features["bitop3-insts"] = true;
+    Features["fp6bf6-cvt-scale-insts"] = true;
+    Features["fp4-cvt-scale-insts"] = true;
+    Features["bf8-cvt-scale-insts"] = true;
+    Features["fp8-cvt-scale-insts"] = true;
+    Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
+    Features["f32-to-f16bf16-cvt-sr-insts"] = true;
+    Features["prng-inst"] = true;
+    Features["permlane16-swap"] = true;
+    Features["permlane32-swap"] = true;
+    Features["ashr-pk-insts"] = true;
+    Features["dot12-insts"] = true;
+    Features["dot13-insts"] = true;
+    Features["atomic-buffer-pk-add-bf16-inst"] = true;
+    Features["gfx950-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX942:
+    Features["fp8-insts"] = true;
+    Features["fp8-conversion-insts"] = true;
+    if (Kind != GK_GFX950)
+      Features["xf32-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX9_4_GENERIC:
+    Features["gfx940-insts"] = true;
+    Features["atomic-ds-pk-add-16-insts"] = true;
+    Features["atomic-flat-pk-add-16-insts"] = true;
+    Features["atomic-global-pk-add-bf16-inst"] = true;
+    Features["gfx90a-insts"] = true;
+    Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+    Features["atomic-fadd-rtn-insts"] = true;
+    Features["dot3-insts"] = true;
+    Features["dot4-insts"] = true;
+    Features["dot5-insts"] = true;
+    Features["dot6-insts"] = true;
+    Features["mai-insts"] = true;
+    Features["dl-insts"] = true;
+    Features["dot1-insts"] = true;
+    Features["dot2-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot10-insts"] = true;
+    Features["gfx9-insts"] = true;
+    Features["gfx8-insts"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["s-memrealtime"] = true;
+    Features["ci-insts"] = true;
+    Features["s-memtime-inst"] = true;
+    Features["gws"] = true;
+    Features["vmem-to-lds-load-insts"] = true;
+    Features["atomic-fmin-fmax-global-f64"] = true;
+    Features["wavefrontsize64"] = true;
+    break;
+  case GK_GFX90A:
+    Features["gfx90a-insts"] = true;
+    Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+    Features["atomic-fadd-rtn-insts"] = true;
+    Features["atomic-fmin-fmax-global-f64"] = true;
+    [[fallthrough]];
+  case GK_GFX908:
+    Features["dot3-insts"] = true;
+    Features["dot4-insts"] = true;
+    Features["dot5-insts"] = true;
+    Features["dot6-insts"] = true;
+    Features["mai-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX906:
+    Features["dl-insts"] = true;
+    Features["dot1-insts"] = true;
+    Features["dot2-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot10-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX90C:
+  case GK_GFX909:
+  case GK_GFX904:
+  case GK_GFX902:
+  case GK_GFX900:
+  case GK_GFX9_GENERIC:
+    Features["gfx9-insts"] = true;
+    Features["vmem-to-lds-load-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX810:
+  case GK_GFX805:
+  case GK_GFX803:
+  case GK_GFX802:
+  case GK_GFX801:
+    Features["gfx8-insts"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["s-memrealtime"] = true;
+    Features["ci-insts"] = true;
+    Features["image-insts"] = true;
+    Features["s-memtime-inst"] = true;
+    Features["gws"] = true;
+    Features["wavefrontsize64"] = true;
+    break;
+  case GK_GFX705:
+  case GK_GFX704:
+  case GK_GFX703:
+  case GK_GFX702:
+  case GK_GFX701:
+  case GK_GFX700:
+    Features["ci-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX602:
+  case GK_GFX601:
+  case GK_GFX600:
+    Features["image-insts"] = true;
+    Features["s-memtime-inst"] = true;
+    Features["gws"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    Features["atomic-fmin-fmax-global-f64"] = true;
+    Features["wavefrontsize64"] = true;
+    break;
+  case GK_NONE:
+    break;
+  default:
+    llvm_unreachable("Unhandled GPU!");
+  }
+}
+
+std::pair<FeatureError, StringRef>
+AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+                             StringMap<bool> &Features) {
   // XXX - What does the member GPU mean if device name string passed here?
   if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
     // AMDGCN SPIRV must support the union of all AMDGCN features. This list
@@ -434,280 +746,9 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
     Features["wavefrontsize32"] = true;
     Features["wavefrontsize64"] = true;
   } else if (T.isAMDGCN()) {
-    AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
-    switch (Kind) {
-    case GK_GFX1250:
-      Features["ci-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot8-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["gfx11-insts"] = true;
-      Features["gfx12-insts"] = true;
-      Features["gfx1250-insts"] = true;
-      Features["bitop3-insts"] = true;
-      Features["prng-inst"] = true;
-      Features["tanh-insts"] = true;
-      Features["tensor-cvt-lut-insts"] = true;
-      Features["transpose-load-f4f6-insts"] = true;
-      Features["bf16-trans-insts"] = true;
-      Features["bf16-cvt-insts"] = true;
-      Features["fp8-conversion-insts"] = true;
-      Features["fp8e5m3-insts"] = true;
-      Features["permlane16-swap"] = true;
-      Features["ashr-pk-insts"] = true;
-      Features["atomic-buffer-pk-add-bf16-inst"] = true;
-      Features["vmem-pref-insts"] = true;
-      Features["atomic-fadd-rtn-insts"] = true;
-      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
-      Features["atomic-flat-pk-add-16-insts"] = true;
-      Features["atomic-global-pk-add-bf16-inst"] = true;
-      Features["atomic-ds-pk-add-16-insts"] = true;
-      Features["setprio-inc-wg-inst"] = true;
-      Features["atomic-fmin-fmax-global-f32"] = true;
-      Features["atomic-fmin-fmax-global-f64"] = true;
-      Features["wavefrontsize32"] = true;
-      break;
-    case GK_GFX1201:
-    case GK_GFX1200:
-    case GK_GFX12_GENERIC:
-      Features["ci-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot8-insts"] = true;
-      Features["dot9-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dot11-insts"] = true;
-      Features["dot12-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["atomic-ds-pk-add-16-insts"] = true;
-      Features["atomic-flat-pk-add-16-insts"] = true;
-      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
-      Features["atomic-buffer-pk-add-bf16-inst"] = true;
-      Features["atomic-global-pk-add-bf16-inst"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["gfx11-insts"] = true;
-      Features["gfx12-insts"] = true;
-      Features["atomic-fadd-rtn-insts"] = true;
-      Features["image-insts"] = true;
-      Features["fp8-conversion-insts"] = true;
-      Features["atomic-fmin-fmax-global-f32"] = true;
-      break;
-    case GK_GFX1153:
-    case GK_GFX1152:
-    case GK_GFX1151:
-    case GK_GFX1150:
-    case GK_GFX1103:
-    case GK_GFX1102:
-    case GK_GFX1101:
-    case GK_GFX1100:
-    case GK_GFX11_GENERIC:
-      Features["ci-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot8-insts"] = true;
-      Features["dot9-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dot12-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["gfx11-insts"] = true;
-      Features["atomic-fadd-rtn-insts"] = true;
-      Features["image-insts"] = true;
-      Features["gws"] = true;
-      Features["atomic-fmin-fmax-global-f32"] = true;
-      break;
-    case GK_GFX1036:
-    case GK_GFX1035:
-    case GK_GFX1034:
-    case GK_GFX1033:
-    case GK_GFX1032:
-    case GK_GFX1031:
-    case GK_GFX1030:
-    case GK_GFX10_3_GENERIC:
-      Features["ci-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["image-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      Features["gws"] = true;
-      Features["vmem-to-lds-load-insts"] = true;
-      Features["atomic-fmin-fmax-global-f32"] = true;
-      Features["atomic-fmin-fmax-global-f64"] = true;
-      break;
-    case GK_GFX1012:
-    case GK_GFX1011:
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX1013:
-    case GK_GFX1010:
-    case GK_GFX10_1_GENERIC:
-      Features["dl-insts"] = true;
-      Features["ci-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["image-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      Features["gws"] = true;
-      Features["vmem-to-lds-load-insts"] = true;
-      Features["atomic-fmin-fmax-global-f32"] = true;
-      Features["atomic-fmin-fmax-global-f64"] = true;
-      break;
-    case GK_GFX950:
-      Features["bitop3-insts"] = true;
-      Features["fp6bf6-cvt-scale-insts"] = true;
-      Features["fp4-cvt-scale-insts"] = true;
-      Features["bf8-cvt-scale-insts"] = true;
-      Features["fp8-cvt-scale-insts"] = true;
-      Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
-      Features["f32-to-f16bf16-cvt-sr-insts"] = true;
-      Features["prng-inst"] = true;
-      Features["permlane16-swap"] = true;
-      Features["permlane32-swap"] = true;
-      Features["ashr-pk-insts"] = true;
-      Features["dot12-insts"] = true;
-      Features["dot13-insts"] = true;
-      Features["atomic-buffer-pk-add-bf16-inst"] = true;
-      Features["gfx950-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX942:
-      Features["fp8-insts"] = true;
-      Features["fp8-conversion-insts"] = true;
-      if (Kind != GK_GFX950)
-        Features["xf32-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX9_4_GENERIC:
-      Features["gfx940-insts"] = true;
-      Features["atomic-ds-pk-add-16-insts"] = true;
-      Features["atomic-flat-pk-add-16-insts"] = true;
-      Features["atomic-global-pk-add-bf16-inst"] = true;
-      Features["gfx90a-insts"] = true;
-      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
-      Features["atomic-fadd-rtn-insts"] = true;
-      Features["dot3-insts"] = true;
-      Features["dot4-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["mai-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx8-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["s-memrealtime"] = true;
-      Features["ci-insts"] = true;
-      Features["s-memtime-inst"] = true;
-      Features["gws"] = true;
-      Features["vmem-to-lds-load-insts"] = true;
-      Features["atomic-fmin-fmax-global-f64"] = true;
-      Features["wavefrontsize64"] = true;
-      break;
-    case GK_GFX90A:
-      Features["gfx90a-insts"] = true;
-      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
-      Features["atomic-fadd-rtn-insts"] = true;
-      Features["atomic-fmin-fmax-global-f64"] = true;
-      [[fallthrough]];
-    case GK_GFX908:
-      Features["dot3-insts"] = true;
-      Features["dot4-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["mai-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX906:
-      Features["dl-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX90C:
-    case GK_GFX909:
-    case GK_GFX904:
-    case GK_GFX902:
-    case GK_GFX900:
-    case GK_GFX9_GENERIC:
-      Features["gfx9-insts"] = true;
-      Features["vmem-to-lds-load-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX810:
-    case GK_GFX805:
-    case GK_GFX803:
-    case GK_GFX802:
-    case GK_GFX801:
-      Features["gfx8-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["s-memrealtime"] = true;
-      Features["ci-insts"] = true;
-      Features["image-insts"] = true;
-      Features["s-memtime-inst"] = true;
-      Features["gws"] = true;
-      Features["wavefrontsize64"] = true;
-      break;
-    case GK_GFX705:
-    case GK_GFX704:
-    case GK_GFX703:
-    case GK_GFX702:
-    case GK_GFX701:
-    case GK_GFX700:
-      Features["ci-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX602:
-    case GK_GFX601:
-    case GK_GFX600:
-      Features["image-insts"] = true;
-      Features["s-memtime-inst"] = true;
-      Features["gws"] = true;
-      Features["atomic-fmin-fmax-global-f32"] = true;
-      Features["atomic-fmin-fmax-global-f64"] = true;
-      Features["wavefrontsize64"] = true;
-      break;
-    case GK_NONE:
-      break;
-    default:
-      llvm_unreachable("Unhandled GPU!");
-    }
+    StringMap<bool> DefaultFeatures;
+    fillAMDGCNFeatureMap(GPU, T, DefaultFeatures);
+    return insertWaveSizeFeature(GPU, T, DefaultFeatures, Features);
   } else {
     if (GPU.empty())
       GPU = "r600";
@@ -736,39 +777,5 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       llvm_unreachable("Unhandled GPU!");
     }
   }
-}
-
-std::pair<FeatureError, StringRef>
-AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
-                              StringMap<bool> &Features) {
-  StringMap<bool> DefaultFeatures;
-  fillAMDGPUFeatureMap(GPU, T, DefaultFeatures);
-
-  const bool IsNullGPU = GPU.empty();
-  const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32");
-  const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64");
-  const bool HaveWave32 = Features.count("wavefrontsize32");
-  const bool HaveWave64 = Features.count("wavefrontsize64");
-  if (HaveWave32 && HaveWave64) {
-    return {AMDGPU::INVALID_FEATURE_COMBINATION,
-            "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"};
-  }
-  if (HaveWave32 && !IsNullGPU && TargetHasWave64) {
-    return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"};
-  }
-  if (HaveWave64 && !IsNullGPU && TargetHasWave32) {
-    return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"};
-  }
-  // Don't assume any wavesize with an unknown subtarget.
-  // Default to wave32 if target supports both.
-  if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 &&
-      !TargetHasWave64)
-    Features.insert(std::make_pair("wavefrontsize32", true));
-
-  for (const auto &Entry : DefaultFeatures) {
-    if (!Features.count(Entry.getKey()))
-      Features[Entry.getKey()] = Entry.getValue();
-  }
-
   return {NO_ERROR, StringRef()};
 }
    
    
More information about the llvm-branch-commits
mailing list