[llvm] e43247d - [Clang][Flang][AMDGPU] Add support for AMDGPU to Flang driver

Wed Mar 29 00:30:20 PDT 2023

Author: Dominik Adamski
Date: 2023-03-29T02:23:37-05:00
New Revision: e43247dd329cabf7eb4dd4323b3422d3774e57a7

URL: https://github.com/llvm/llvm-project/commit/e43247dd329cabf7eb4dd4323b3422d3774e57a7
DIFF: https://github.com/llvm/llvm-project/commit/e43247dd329cabf7eb4dd4323b3422d3774e57a7.diff

LOG: [Clang][Flang][AMDGPU] Add support for AMDGPU to Flang driver

Scope of changes:
  1) Extract common code between Clang and Flang for parsing AMDGPU features
  2) Add function which adds implicit target features for AMDGPU as Clang does
  3) Add AMDGPU target as one of valid targets for Flang

Differential Revision: https://reviews.llvm.org/D145579

Reviewed By: yaxunl, awarzynski

Added: 
    

Modified: 
    clang/lib/Basic/Targets/AMDGPU.cpp
    clang/lib/Driver/ToolChains/CommonArgs.cpp
    clang/lib/Driver/ToolChains/Flang.cpp
    flang/lib/Frontend/FrontendActions.cpp
    flang/test/Driver/target-cpu-features-invalid.f90
    flang/test/Driver/target-cpu-features.f90
    llvm/include/llvm/TargetParser/TargetParser.h
    llvm/lib/TargetParser/TargetParser.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 9b3a0b0f40edb..5b99755c21e2b 100644

--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -179,197 +179,19 @@ ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
 bool AMDGPUTargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeatureVec) const {
-  const bool IsNullCPU = CPU.empty();
-  bool IsWave32Capable = false;
 
   using namespace llvm::AMDGPU;
-
-  // XXX - What does the member GPU mean if device name string passed here?
-  if (isAMDGCN(getTriple())) {
-    switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
-    case GK_GFX1103:
-    case GK_GFX1102:
-    case GK_GFX1101:
-    case GK_GFX1100:
-      IsWave32Capable = true;
-      Features["ci-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot8-insts"] = true;
-      Features["dot9-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["gfx11-insts"] = true;
-      Features["atomic-fadd-rtn-insts"] = true;
-      break;
-    case GK_GFX1036:
-    case GK_GFX1035:
-    case GK_GFX1034:
-    case GK_GFX1033:
-    case GK_GFX1032:
-    case GK_GFX1031:
-    case GK_GFX1030:
-      IsWave32Capable = true;
-      Features["ci-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      Features["dl-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["gfx10-3-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_GFX1012:
-    case GK_GFX1011:
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX1013:
-    case GK_GFX1010:
-      IsWave32Capable = true;
-      Features["dl-insts"] = true;
-      Features["ci-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["gfx8-insts"] = true;
-      Features["gfx9-insts"] = true;
-      Features["gfx10-insts"] = true;
-      Features["s-memrealtime"] = true;
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_GFX940:
-      Features["gfx940-insts"] = true;
-      Features["fp8-insts"] = true;
-      Features["atomic-ds-pk-add-16-insts"] = true;
-      Features["atomic-flat-pk-add-16-insts"] = true;
-      Features["atomic-global-pk-add-bf16-inst"] = true;
-      [[fallthrough]];
-    case GK_GFX90A:
-      Features["gfx90a-insts"] = true;
-      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
-      Features["atomic-fadd-rtn-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX908:
-      Features["dot3-insts"] = true;
-      Features["dot4-insts"] = true;
-      Features["dot5-insts"] = true;
-      Features["dot6-insts"] = true;
-      Features["mai-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX906:
-      Features["dl-insts"] = true;
-      Features["dot1-insts"] = true;
-      Features["dot2-insts"] = true;
-      Features["dot7-insts"] = true;
-      Features["dot10-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX90C:
-    case GK_GFX909:
-    case GK_GFX904:
-    case GK_GFX902:
-    case GK_GFX900:
-      Features["gfx9-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX810:
-    case GK_GFX805:
-    case GK_GFX803:
-    case GK_GFX802:
-    case GK_GFX801:
-      Features["gfx8-insts"] = true;
-      Features["16-bit-insts"] = true;
-      Features["dpp"] = true;
-      Features["s-memrealtime"] = true;
-      [[fallthrough]];
-    case GK_GFX705:
-    case GK_GFX704:
-    case GK_GFX703:
-    case GK_GFX702:
-    case GK_GFX701:
-    case GK_GFX700:
-      Features["ci-insts"] = true;
-      [[fallthrough]];
-    case GK_GFX602:
-    case GK_GFX601:
-    case GK_GFX600:
-      Features["s-memtime-inst"] = true;
-      break;
-    case GK_NONE:
-      break;
-    default:
-      llvm_unreachable("Unhandled GPU!");
-    }
-  } else {
-    if (CPU.empty())
-      CPU = "r600";
-
-    switch (llvm::AMDGPU::parseArchR600(CPU)) {
-    case GK_CAYMAN:
-    case GK_CYPRESS:
-    case GK_RV770:
-    case GK_RV670:
-      // TODO: Add fp64 when implemented.
-      break;
-    case GK_TURKS:
-    case GK_CAICOS:
-    case GK_BARTS:
-    case GK_SUMO:
-    case GK_REDWOOD:
-    case GK_JUNIPER:
-    case GK_CEDAR:
-    case GK_RV730:
-    case GK_RV710:
-    case GK_RS880:
-    case GK_R630:
-    case GK_R600:
-      break;
-    default:
-      llvm_unreachable("Unhandled GPU!");
-    }
-  }
-
+  fillAMDGPUFeatureMap(CPU, getTriple(), Features);
   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
     return false;
 
-  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
-  const bool HaveWave32 =
-      (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32");
-  const bool HaveWave64 = Features.count("wavefrontsize64");
-
   // TODO: Should move this logic into TargetParser
-  if (HaveWave32 && HaveWave64) {
-    Diags.Report(diag::err_invalid_feature_combination)
-        << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+  std::string ErrorMsg;
+  if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
+    Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
     return false;
   }
 
-  // Don't assume any wavesize with an unknown subtarget.
-  if (!IsNullCPU) {
-    // Default to wave32 if available, or wave64 if not
-    if (!HaveWave32 && !HaveWave64) {
-      StringRef DefaultWaveSizeFeature =
-          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
-      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
-    }
-  }
-
   return true;
 }
 

diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 4273373f62f35..7f4d82aae4cc1 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -331,6 +331,9 @@ static std::string getAMDGPUTargetGPU(const llvm::Triple &T,
         .Case("aruba", "cayman")
         .Default(GPUName.str());
   }
+  if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
+    return getProcessorFromTargetID(T, A->getValue()).str();
+  }
   return "";
 }
 

diff  --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 164238d889b53..cd5782dba34d8 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -107,6 +107,8 @@ void Flang::addTargetOptions(const ArgList &Args,
   switch (TC.getArch()) {
   default:
     break;
+  case llvm::Triple::r600:
+  case llvm::Triple::amdgcn:
   case llvm::Triple::aarch64:
   case llvm::Triple::riscv64:
   case llvm::Triple::x86_64:

diff  --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 7aeda4a9f3560..8b2055e646510 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -60,6 +60,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/TargetParser.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <memory>
 #include <system_error>
@@ -131,6 +132,60 @@ bool PrescanAndSemaDebugAction::beginSourceFileAction() {
          (generateRtTypeTables() || true);
 }
 
+// Get feature string which represents combined explicit target features
+// for AMD GPU and the target features specified by the user
+static std::string
+getExplicitAndImplicitAMDGPUTargetFeatures(CompilerInstance &ci,
+                                           const TargetOptions &targetOpts,
+                                           const llvm::Triple triple) {
+  llvm::StringRef cpu = targetOpts.cpu;
+  llvm::StringMap<bool> implicitFeaturesMap;
+  std::string errorMsg;
+  // Get the set of implicit target features
+  llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap);
+
+  // Add target features specified by the user
+  for (auto &userFeature : targetOpts.featuresAsWritten) {
+    std::string userKeyString = userFeature.substr(1);
+    implicitFeaturesMap[userKeyString] = (userFeature[0] == '+');
+  }
+
+  if (!llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap,
+                                           errorMsg)) {
+    unsigned diagID = ci.getDiagnostics().getCustomDiagID(
+        clang::DiagnosticsEngine::Error, "Unsupported feature ID: %0");
+    ci.getDiagnostics().Report(diagID) << errorMsg.data();
+    return std::string();
+  }
+
+  llvm::SmallVector<std::string> featuresVec;
+  for (auto &implicitFeatureItem : implicitFeaturesMap) {
+    featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") +
+                           implicitFeatureItem.first().str())
+                              .str());
+  }
+
+  return llvm::join(featuresVec, ",");
+}
+
+// Produces the string which represents target feature
+static std::string getTargetFeatures(CompilerInstance &ci) {
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+
+  // Clang does not append all target features to the clang -cc1 invocation.
+  // Some target features are parsed implicitly by clang::TargetInfo child
+  // class. Clang::TargetInfo classes are the basic clang classes and
+  // they cannot be reused by Flang.
+  // That's why we need to extract implicit target features and add
+  // them to the target features specified by the user
+  if (triple.isAMDGPU()) {
+    return getExplicitAndImplicitAMDGPUTargetFeatures(ci, targetOpts, triple);
+  }
+  return llvm::join(targetOpts.featuresAsWritten.begin(),
+                    targetOpts.featuresAsWritten.end(), ",");
+}
+
 static void setMLIRDataLayout(mlir::ModuleOp &mlirModule,
                               const llvm::DataLayout &dl) {
   mlir::MLIRContext *context = mlirModule.getContext();
@@ -671,8 +726,7 @@ bool CodeGenAction::setUpTargetMachine() {
       llvm::CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
   assert(OptLevelOrNone && "Invalid optimization level!");
   llvm::CodeGenOpt::Level OptLevel = *OptLevelOrNone;
-  std::string featuresStr = llvm::join(targetOpts.featuresAsWritten.begin(),
-                                       targetOpts.featuresAsWritten.end(), ",");
+  std::string featuresStr = getTargetFeatures(ci);
   tm.reset(theTarget->createTargetMachine(
       theTriple, /*CPU=*/targetOpts.cpu,
       /*Features=*/featuresStr, llvm::TargetOptions(),

diff  --git a/flang/test/Driver/target-cpu-features-invalid.f90 b/flang/test/Driver/target-cpu-features-invalid.f90
index 7ecbe597637c6..288da8d57e81d 100644
--- a/flang/test/Driver/target-cpu-features-invalid.f90
+++ b/flang/test/Driver/target-cpu-features-invalid.f90
@@ -1,4 +1,4 @@
-! REQUIRES: aarch64-registered-target
+! REQUIRES: aarch64-registered-target, amdgpu-registered-target
 
 ! Test that invalid cpu and features are ignored.
 
@@ -8,6 +8,9 @@
 ! RUN: %flang_fc1 -triple aarch64-linux-gnu -target-feature +superspeed \
 ! RUN:   -o /dev/null -S %s 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FEATURE
 
+! RUN: not %flang_fc1 -triple amdgcn-amd-amdhsa -target-feature +wavefrontsize32 \
+! RUN:   -target-feature +wavefrontsize64 -o /dev/null -S %s 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-WAVEFRONT
 
 ! CHECK-INVALID-CPU: 'supercpu' is not a recognized processor for this target (ignoring processor)
 ! CHECK-INVALID-FEATURE: '+superspeed' is not a recognized feature for this target (ignoring feature)
+! CHECK-INVALID-WAVEFRONT: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive

diff  --git a/flang/test/Driver/target-cpu-features.f90 b/flang/test/Driver/target-cpu-features.f90
index ca2ed274a6b6e..30fcdca2dba41 100644
--- a/flang/test/Driver/target-cpu-features.f90
+++ b/flang/test/Driver/target-cpu-features.f90
@@ -23,6 +23,11 @@
 ! RUN: %flang --target=riscv64-linux-gnu -c %s -### 2>&1 \
 ! RUN: | FileCheck %s -check-prefix=CHECK-RV64
 
+! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx908 -c %s -### 2>&1 \
+! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU
+
+! RUN: %flang --target=r600-unknown-unknown -mcpu=cayman -c %s -### 2>&1 \
+! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU-R600
 
 ! CHECK-A57: "-fc1" "-triple" "aarch64-unknown-linux-gnu"
 ! CHECK-A57-SAME: "-target-cpu" "cortex-a57" "-target-feature" "+v8a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+fp-armv8" "-target-feature" "+sha2" "-target-feature" "+neon"
@@ -46,3 +51,9 @@
 
 ! CHECK-RV64: "-fc1" "-triple" "riscv64-unknown-linux-gnu"
 ! CHECK-RV64-SAME: "-target-cpu" "generic-rv64" "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+c"
+
+! CHECK-AMDGPU: "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! CHECK-AMDGPU-SAME: "-target-cpu" "gfx908"
+
+! CHECK-AMDGPU-R600: "-fc1" "-triple" "r600-unknown-unknown"
+! CHECK-AMDGPU-R600-SAME: "-target-cpu" "cayman"

diff  --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index a6991d26fe447..cf10b9b43615f 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TARGETPARSER_TARGETPARSER_H
 #define LLVM_TARGETPARSER_TARGETPARSER_H
 
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
@@ -149,6 +150,14 @@ void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
 
 IsaVersion getIsaVersion(StringRef GPU);
 
+/// Fills Features map with default values for given target GPU
+void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+                          StringMap<bool> &Features);
+
+/// Inserts wave size feature for given GPU into features map
+bool insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                           StringMap<bool> &Features, std::string &ErrorMsg);
+
 } // namespace AMDGPU
 } // namespace llvm
 

diff  --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index e9fccef0433e9..96e64c2092a0d 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -251,3 +251,218 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
 
   return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
 }
+
+void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+                                  StringMap<bool> &Features) {
+  // XXX - What does the member GPU mean if device name string passed here?
+  if (T.isAMDGCN()) {
+    switch (parseArchAMDGCN(GPU)) {
+    case GK_GFX1103:
+    case GK_GFX1102:
+    case GK_GFX1101:
+    case GK_GFX1100:
+      Features["ci-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot8-insts"] = true;
+      Features["dot9-insts"] = true;
+      Features["dot10-insts"] = true;
+      Features["dl-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["gfx10-3-insts"] = true;
+      Features["gfx11-insts"] = true;
+      Features["atomic-fadd-rtn-insts"] = true;
+      break;
+    case GK_GFX1036:
+    case GK_GFX1035:
+    case GK_GFX1034:
+    case GK_GFX1033:
+    case GK_GFX1032:
+    case GK_GFX1031:
+    case GK_GFX1030:
+      Features["ci-insts"] = true;
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      Features["dl-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["gfx10-3-insts"] = true;
+      Features["s-memrealtime"] = true;
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_GFX1012:
+    case GK_GFX1011:
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX1013:
+    case GK_GFX1010:
+      Features["dl-insts"] = true;
+      Features["ci-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["gfx8-insts"] = true;
+      Features["gfx9-insts"] = true;
+      Features["gfx10-insts"] = true;
+      Features["s-memrealtime"] = true;
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_GFX940:
+      Features["gfx940-insts"] = true;
+      Features["fp8-insts"] = true;
+      Features["atomic-ds-pk-add-16-insts"] = true;
+      Features["atomic-flat-pk-add-16-insts"] = true;
+      Features["atomic-global-pk-add-bf16-inst"] = true;
+      [[fallthrough]];
+    case GK_GFX90A:
+      Features["gfx90a-insts"] = true;
+      Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+      Features["atomic-fadd-rtn-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX908:
+      Features["dot3-insts"] = true;
+      Features["dot4-insts"] = true;
+      Features["dot5-insts"] = true;
+      Features["dot6-insts"] = true;
+      Features["mai-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX906:
+      Features["dl-insts"] = true;
+      Features["dot1-insts"] = true;
+      Features["dot2-insts"] = true;
+      Features["dot7-insts"] = true;
+      Features["dot10-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX90C:
+    case GK_GFX909:
+    case GK_GFX904:
+    case GK_GFX902:
+    case GK_GFX900:
+      Features["gfx9-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX810:
+    case GK_GFX805:
+    case GK_GFX803:
+    case GK_GFX802:
+    case GK_GFX801:
+      Features["gfx8-insts"] = true;
+      Features["16-bit-insts"] = true;
+      Features["dpp"] = true;
+      Features["s-memrealtime"] = true;
+      [[fallthrough]];
+    case GK_GFX705:
+    case GK_GFX704:
+    case GK_GFX703:
+    case GK_GFX702:
+    case GK_GFX701:
+    case GK_GFX700:
+      Features["ci-insts"] = true;
+      [[fallthrough]];
+    case GK_GFX602:
+    case GK_GFX601:
+    case GK_GFX600:
+      Features["s-memtime-inst"] = true;
+      break;
+    case GK_NONE:
+      break;
+    default:
+      llvm_unreachable("Unhandled GPU!");
+    }
+  } else {
+    if (GPU.empty())
+      GPU = "r600";
+
+    switch (llvm::AMDGPU::parseArchR600(GPU)) {
+    case GK_CAYMAN:
+    case GK_CYPRESS:
+    case GK_RV770:
+    case GK_RV670:
+      // TODO: Add fp64 when implemented.
+      break;
+    case GK_TURKS:
+    case GK_CAICOS:
+    case GK_BARTS:
+    case GK_SUMO:
+    case GK_REDWOOD:
+    case GK_JUNIPER:
+    case GK_CEDAR:
+    case GK_RV730:
+    case GK_RV710:
+    case GK_RS880:
+    case GK_R630:
+    case GK_R600:
+      break;
+    default:
+      llvm_unreachable("Unhandled GPU!");
+    }
+  }
+}
+
+static bool isWave32Capable(StringRef GPU, const Triple &T) {
+  bool IsWave32Capable = false;
+  // XXX - What does the member GPU mean if device name string passed here?
+  if (T.isAMDGCN()) {
+    switch (parseArchAMDGCN(GPU)) {
+    case GK_GFX1103:
+    case GK_GFX1102:
+    case GK_GFX1101:
+    case GK_GFX1100:
+    case GK_GFX1036:
+    case GK_GFX1035:
+    case GK_GFX1034:
+    case GK_GFX1033:
+    case GK_GFX1032:
+    case GK_GFX1031:
+    case GK_GFX1030:
+    case GK_GFX1012:
+    case GK_GFX1011:
+    case GK_GFX1013:
+    case GK_GFX1010:
+      IsWave32Capable = true;
+      break;
+    default:
+      break;
+    }
+  }
+  return IsWave32Capable;
+}
+
+bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                                   StringMap<bool> &Features,
+                                   std::string &ErrorMsg) {
+  bool IsWave32Capable = isWave32Capable(GPU, T);
+  const bool IsNullGPU = GPU.empty();
+  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
+  const bool HaveWave32 =
+      (IsWave32Capable || IsNullGPU) && Features.count("wavefrontsize32");
+  const bool HaveWave64 = Features.count("wavefrontsize64");
+  if (HaveWave32 && HaveWave64) {
+    ErrorMsg = "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+    return false;
+  }
+  // Don't assume any wavesize with an unknown subtarget.
+  if (!IsNullGPU) {
+    // Default to wave32 if available, or wave64 if not
+    if (!HaveWave32 && !HaveWave64) {
+      StringRef DefaultWaveSizeFeature =
+          IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
+      Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
+    }
+  }
+  return true;
+}