[llvm] 8e68c12 - [AMDGPU] Remove function with incompatible features

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 21 01:42:45 PST 2023


Author: pvanhout
Date: 2023-02-21T10:42:39+01:00
New Revision: 8e68c1204580d0bb57001929d3345af80b27ac01

URL: https://github.com/llvm/llvm-project/commit/8e68c1204580d0bb57001929d3345af80b27ac01
DIFF: https://github.com/llvm/llvm-project/commit/8e68c1204580d0bb57001929d3345af80b27ac01.diff

LOG: [AMDGPU] Remove function with incompatible features

Adds a new pass that removes functions
if they use features that are not supported on the current GPU.

This change is aimed at preventing crashes when building code at O0 that
uses idioms such as `if (ISA_VERSION >= N) intrinsic_a(); else intrinsic_b();`
where ISA_VERSION is not constexpr, and intrinsic_a is not selectable
on older targets.
This is a pattern that's used all over the ROCm device libs. The main
motive behind this change is to allow code using ROCm device libs
to be built at O0.

Note: the feature checking logic is done ad-hoc in the pass. There is no other
pass that needs (or will need in the foreseeable future) to do similar
feature-checking logic so I did not see a need to generalize the feature
checking logic yet. It can (and should probably) be generalized later and
moved to a TargetInfo-like class or helper file.

Reviewed By: arsenm, Joe_Nash

Differential Revision: https://reviews.llvm.org/D139000

Added: 
    llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
    llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll

Modified: 
    llvm/include/llvm/MC/MCSubtargetInfo.h
    llvm/lib/Target/AMDGPU/AMDGPU.h
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/lib/Target/AMDGPU/CMakeLists.txt
    llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll
    llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index 26aa8cdfa9374..943486ac149fc 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -230,6 +230,10 @@ class MCSubtargetInfo {
     return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
   }
 
+  ArrayRef<SubtargetSubTypeKV> getAllProcessorDescriptions() const {
+    return ProcDesc;
+  }
+
   virtual unsigned getHwMode() const { return 0; }
 
   /// Return the cache size in bytes for the given level of cache.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 59fe7ae6e3560..a345b9dc043d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,6 +49,7 @@ FunctionPass *createSIFormMemoryClausesPass();
 FunctionPass *createSIPostRABundlerPass();
 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
 FunctionPass *createAMDGPUUseNativeCallsPass();
+ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
 FunctionPass *createAMDGPUCodeGenPreparePass();
 FunctionPass *createAMDGPULateCodeGenPreparePass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
@@ -288,6 +289,9 @@ extern char &AMDGPUAnnotateUniformValuesPassID;
 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
 extern char &AMDGPUCodeGenPrepareID;
 
+void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
+extern char &AMDGPURemoveIncompatibleFunctionsID;
+
 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
 extern char &AMDGPULateCodeGenPrepareID;
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
new file mode 100644
index 0000000000000..c5ca2ef2b2490
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -0,0 +1,189 @@
+//===-- AMDGPURemoveIncompatibleFunctions.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass replaces all uses of functions that use GPU features
+/// incompatible with the current GPU with null then deletes the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "amdgpu-remove-incompatible-functions"
+
+using namespace llvm;
+
+namespace llvm {
+extern const SubtargetFeatureKV
+    AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures - 1];
+}
+
+namespace {
+
+using Generation = AMDGPUSubtarget::Generation;
+
+class AMDGPURemoveIncompatibleFunctions : public ModulePass {
+public:
+  static char ID;
+
+  AMDGPURemoveIncompatibleFunctions(const TargetMachine *TM = nullptr)
+      : ModulePass(ID), TM(TM) {
+    assert(TM && "No TargetMachine!");
+  }
+
+  StringRef getPassName() const override {
+    return "AMDGPU Remove Incompatible Functions";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {}
+
+  /// Checks a single function, returns true if the function must be deleted.
+  bool checkFunction(Function &F);
+
+  bool runOnModule(Module &M) override {
+    assert(TM->getTargetTriple().isAMDGCN());
+
+    SmallVector<Function *, 4> FnsToDelete;
+    for (Function &F : M) {
+      if (checkFunction(F))
+        FnsToDelete.push_back(&F);
+    }
+
+    for (Function *F : FnsToDelete) {
+      F->replaceAllUsesWith(ConstantPointerNull::get(F->getType()));
+      F->eraseFromParent();
+    }
+    return !FnsToDelete.empty();
+  }
+
+private:
+  const TargetMachine *TM = nullptr;
+};
+
+StringRef getFeatureName(unsigned Feature) {
+  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV)
+    if (Feature == KV.Value)
+      return KV.Key;
+
+  llvm_unreachable("Unknown Target feature");
+}
+
+const SubtargetSubTypeKV *getGPUInfo(const GCNSubtarget &ST,
+                                     StringRef GPUName) {
+  for (const SubtargetSubTypeKV &KV : ST.getAllProcessorDescriptions())
+    if (StringRef(KV.Key) == GPUName)
+      return &KV;
+
+  return nullptr;
+}
+
+constexpr unsigned FeaturesToCheck[] = {
+    AMDGPU::FeatureGFX11Insts, AMDGPU::FeatureGFX10Insts,
+    AMDGPU::FeatureGFX9Insts,  AMDGPU::FeatureGFX8Insts,
+    AMDGPU::FeatureDPP,        AMDGPU::Feature16BitInsts,
+    AMDGPU::FeatureDot1Insts,  AMDGPU::FeatureDot2Insts,
+    AMDGPU::FeatureDot3Insts,  AMDGPU::FeatureDot4Insts,
+    AMDGPU::FeatureDot5Insts,  AMDGPU::FeatureDot6Insts,
+    AMDGPU::FeatureDot7Insts,  AMDGPU::FeatureDot8Insts,
+};
+
+FeatureBitset expandImpliedFeatures(const FeatureBitset &Features) {
+  FeatureBitset Result = Features;
+  for (const SubtargetFeatureKV &FE : AMDGPUFeatureKV) {
+    if (Features.test(FE.Value) && FE.Implies.any())
+      Result |= expandImpliedFeatures(FE.Implies.getAsBitset());
+  }
+  return Result;
+}
+
+static int DK_IncompatibleFn = getNextAvailablePluginDiagnosticKind();
+
+struct DiagnosticInfoRemovingIncompatibleFunction
+    : public DiagnosticInfoWithLocationBase {
+  DiagnosticInfoRemovingIncompatibleFunction(Function &F, Twine M)
+      : DiagnosticInfoWithLocationBase(DiagnosticKind(DK_IncompatibleFn),
+                                       DS_Remark, F, DiagnosticLocation()),
+        Msg(M.str()) {}
+
+  void print(DiagnosticPrinter &DP) const override {
+    DP << getFunction().getName() << ": removing function: " << Msg;
+  }
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_IncompatibleFn;
+  }
+
+  std::string Msg;
+};
+
+} // end anonymous namespace
+
+bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  const GCNSubtarget *ST =
+      static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
+
+  // Check the GPU isn't generic. Generic is used for testing only
+  // and we don't want this pass to interfere with it.
+  StringRef GPUName = ST->getCPU();
+  if (GPUName.empty() || GPUName.contains("generic"))
+    return false;
+
+  // Try to fetch the GPU's info. If we can't, it's likely an unknown processor
+  // so just bail out.
+  const SubtargetSubTypeKV *GPUInfo = getGPUInfo(*ST, GPUName);
+  if (!GPUInfo)
+    return false;
+
+  LLVMContext &Ctx = F.getContext();
+
+  // Get all the features implied by the current GPU, and recursively expand
+  // the features that imply other features.
+  //
+  // e.g. GFX90A implies FeatureGFX9, and FeatureGFX9 implies a whole set of
+  // other features.
+  const FeatureBitset GPUFeatureBits =
+      expandImpliedFeatures(GPUInfo->Implies.getAsBitset());
+
+  // Now that the have a FeatureBitset containing all possible features for
+  // the chosen GPU, check our list of "suspicious" features.
+
+  // Check that the user didn't enable any features that aren't part of that
+  // GPU's feature set. We only check a predetermined set of features.
+  for (unsigned Feature : FeaturesToCheck) {
+    if (ST->hasFeature(Feature) && !GPUFeatureBits.test(Feature)) {
+      DiagnosticInfoRemovingIncompatibleFunction DiagInfo(
+          F, "+" + getFeatureName(Feature) +
+                 " is not supported on the current target");
+      Ctx.diagnose(DiagInfo);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+INITIALIZE_PASS(AMDGPURemoveIncompatibleFunctions, DEBUG_TYPE,
+                "AMDGPU Remove Incompatible Functions", false, false)
+
+char AMDGPURemoveIncompatibleFunctions::ID = 0;
+
+ModulePass *
+llvm::createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *TM) {
+  return new AMDGPURemoveIncompatibleFunctions(TM);
+}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f9cd37ec94585..64dc8604e76ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -216,6 +216,12 @@ static cl::opt<bool> EarlyInlineAll(
   cl::init(false),
   cl::Hidden);
 
+static cl::opt<bool> RemoveIncompatibleFunctions(
+    "amdgpu-enable-remove-incompatible-functions", cl::Hidden,
+    cl::desc("Enable removal of functions when they"
+             "use features not supported by the target GPU"),
+    cl::init(true));
+
 static cl::opt<bool> EnableSDWAPeephole(
   "amdgpu-sdwa-peephole",
   cl::desc("Enable SDWA peepholer"),
@@ -380,6 +386,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPULateCodeGenPreparePass(*PR);
   initializeAMDGPUPropagateAttributesEarlyPass(*PR);
   initializeAMDGPUPropagateAttributesLatePass(*PR);
+  initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
   initializeAMDGPUReplaceLDSUseWithPointerPass(*PR);
   initializeAMDGPULowerModuleLDSPass(*PR);
   initializeAMDGPURewriteOutArgumentsPass(*PR);
@@ -1040,6 +1047,9 @@ void AMDGPUPassConfig::addIRPasses() {
 
 void AMDGPUPassConfig::addCodeGenPrepare() {
   if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
+    if (RemoveIncompatibleFunctions)
+      addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM));
+
     addPass(createAMDGPUAttributorPass());
 
     // FIXME: This pass adds 2 hacky attributes that can be replaced with an

diff  --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 1d04ce0266be1..99e43ac2a1183 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -90,6 +90,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPURegBankSelect.cpp
   AMDGPURegisterBankInfo.cpp
   AMDGPUReleaseVGPRs.cpp
+  AMDGPURemoveIncompatibleFunctions.cpp
   AMDGPUReplaceLDSUseWithPointer.cpp
   AMDGPUResourceUsageAnalysis.cpp
   AMDGPURewriteOutArguments.cpp

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll
index 5dfde116785db..9eeb633f0a817 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s
+; RUN: llc -global-isel -amdgpu-enable-remove-incompatible-functions=0 -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s
 
 ; Make sure legalizer info doesn't assert on dummy targets
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index ae39e8f0bc5e7..c90ec4332642a 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -50,6 +50,7 @@
 ; GCN-O0-NEXT:      Expand vector predication intrinsics
 ; GCN-O0-NEXT:      Scalarize Masked Memory Intrinsics
 ; GCN-O0-NEXT:      Expand reduction intrinsics
+; GCN-O0-NEXT:    AMDGPU Remove Incompatible Functions
 ; GCN-O0-NEXT:    AMDGPU Attributor
 ; GCN-O0-NEXT:      FunctionPass Manager
 ; GCN-O0-NEXT:        Cycle Info Analysis
@@ -231,6 +232,7 @@
 ; GCN-O1-NEXT:      Expand reduction intrinsics
 ; GCN-O1-NEXT:      Natural Loop Information
 ; GCN-O1-NEXT:      TLS Variable Hoist
+; GCN-O1-NEXT:    AMDGPU Remove Incompatible Functions
 ; GCN-O1-NEXT:    AMDGPU Attributor
 ; GCN-O1-NEXT:      FunctionPass Manager
 ; GCN-O1-NEXT:        Cycle Info Analysis
@@ -522,6 +524,7 @@
 ; GCN-O1-OPTS-NEXT:      Natural Loop Information
 ; GCN-O1-OPTS-NEXT:      TLS Variable Hoist
 ; GCN-O1-OPTS-NEXT:      Early CSE
+; GCN-O1-OPTS-NEXT:    AMDGPU Remove Incompatible Functions
 ; GCN-O1-OPTS-NEXT:    AMDGPU Attributor
 ; GCN-O1-OPTS-NEXT:      FunctionPass Manager
 ; GCN-O1-OPTS-NEXT:        Cycle Info Analysis
@@ -827,6 +830,7 @@
 ; GCN-O2-NEXT:      Natural Loop Information
 ; GCN-O2-NEXT:      TLS Variable Hoist
 ; GCN-O2-NEXT:      Early CSE
+; GCN-O2-NEXT:    AMDGPU Remove Incompatible Functions
 ; GCN-O2-NEXT:    AMDGPU Attributor
 ; GCN-O2-NEXT:      FunctionPass Manager
 ; GCN-O2-NEXT:        Cycle Info Analysis
@@ -1145,6 +1149,7 @@
 ; GCN-O3-NEXT:      Lazy Block Frequency Analysis
 ; GCN-O3-NEXT:      Optimization Remark Emitter
 ; GCN-O3-NEXT:      Global Value Numbering
+; GCN-O3-NEXT:    AMDGPU Remove Incompatible Functions
 ; GCN-O3-NEXT:    AMDGPU Attributor
 ; GCN-O3-NEXT:      FunctionPass Manager
 ; GCN-O3-NEXT:        Cycle Info Analysis

diff  --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll
new file mode 100644
index 0000000000000..56fb98784392c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll
@@ -0,0 +1,455 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=bonaire -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX7,IR %s
+; RUN: FileCheck --check-prefix=WARN-GFX7 %s < %t
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
+
+; RUN: llc -march=amdgcn -mcpu=fiji -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX8,IR %s
+; RUN: FileCheck --check-prefix=WARN-GFX8 %s < %t
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx906 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX9,GFX906,IR %s
+; RUN: FileCheck --check-prefix=WARN-GFX906 %s < %t
+; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx90a -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX9,GFX90A,IR %s
+; RUN: FileCheck --check-prefix=WARN-GFX90A %s < %t
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx1011 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX10,IR %s
+; RUN: FileCheck --check-prefix=WARN-GFX10 %s < %t
+; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX11,IR %s
+; RUN: FileCheck --check-prefix=WARN-GFX11 %s < %t
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s
+
+; Note: This test checks the IR, but also has a run line to codegen the file just to check we
+; do not crash when trying to select those functions.
+
+; WARN-GFX7: needs_dpp: removing function: +dpp is not supported on the current target
+; WARN-GFX7: needs_16bit_insts: removing function: +16-bit-insts is not supported on the current target
+; WARN-GFX7: needs_gfx8_insts: removing function: +gfx8-insts is not supported on the current target
+; WARN-GFX7: needs_gfx9_insts: removing function: +gfx9-insts is not supported on the current target
+; WARN-GFX7: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target
+; WARN-GFX7: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target
+; WARN-GFX7: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target
+; WARN-GFX7: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target
+; WARN-GFX7: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target
+; WARN-GFX7: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target
+; WARN-GFX7: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target
+; WARN-GFX7: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target
+; WARN-GFX7: needs_dot7_insts: removing function: +dot7-insts is not supported on the current target
+; WARN-GFX7: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target
+; WARN-GFX7-NOT: not supported
+
+; WARN-GFX8: needs_gfx9_insts: removing function: +gfx9-insts is not supported on the current target
+; WARN-GFX8: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target
+; WARN-GFX8: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target
+; WARN-GFX8: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target
+; WARN-GFX8: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target
+; WARN-GFX8: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target
+; WARN-GFX8: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target
+; WARN-GFX8: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target
+; WARN-GFX8: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target
+; WARN-GFX8: needs_dot7_insts: removing function: +dot7-insts is not supported on the current target
+; WARN-GFX8: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target
+; WARN-GFX8-NOT: not supported
+
+; WARN-GFX906: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target
+; WARN-GFX906: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target
+; WARN-GFX906: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target
+; WARN-GFX906: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target
+; WARN-GFX906: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target
+; WARN-GFX906: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target
+; WARN-GFX906: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target
+; WARN-GFX906-NOT: not supported
+
+; WARN-GFX90A: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target
+; WARN-GFX90A: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target
+; WARN-GFX90A: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target
+; WARN-GFX90A-NOT: not supported
+
+; WARN-GFX10: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target
+; WARN-GFX10: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target
+; WARN-GFX10: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target
+; WARN-GFX10: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target
+; WARN-GFX10-NOT: not supported
+
+; WARN-GFX11: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target
+; WARN-GFX11: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target
+; WARN-GFX11: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target
+; WARN-GFX11: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target
+; WARN-GFX11: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target
+; WARN-GFX11-NOT: not supported
+
+; GFX7:   @GVRefs {{.*}} zeroinitializer
+; GFX8:   @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null] 
+; GFX906: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr null, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr null, ptr null, ptr null, ptr null, ptr @needs_dot7_insts, ptr null]
+; GFX90A: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr null, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr @needs_dot3_insts, ptr @needs_dot4_insts, ptr @needs_dot5_insts, ptr @needs_dot6_insts, ptr @needs_dot7_insts, ptr null]
+; GFX10:  @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr @needs_gfx10_insts, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr null, ptr null, ptr @needs_dot5_insts, ptr @needs_dot6_insts, ptr @needs_dot7_insts, ptr null]
+; GFX11:  @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr @needs_gfx10_insts, ptr @needs_gfx11_insts, ptr null, ptr null, ptr null, ptr null, ptr @needs_dot5_insts, ptr null, ptr @needs_dot7_insts, ptr @needs_dot8_insts]
+ at GVRefs = internal global [14 x ptr] [
+  ptr @needs_dpp,
+  ptr @needs_16bit_insts,
+  ptr @needs_gfx8_insts,
+  ptr @needs_gfx9_insts,
+  ptr @needs_gfx10_insts,
+  ptr @needs_gfx11_insts,
+  ptr @needs_dot1_insts,
+  ptr @needs_dot2_insts,
+  ptr @needs_dot3_insts,
+  ptr @needs_dot4_insts,
+  ptr @needs_dot5_insts,
+  ptr @needs_dot6_insts,
+  ptr @needs_dot7_insts,
+  ptr @needs_dot8_insts
+]
+
+; GFX7: @ConstantExpr = internal global i64 0
+ at ConstantExpr = internal global i64 ptrtoint (ptr @needs_dpp to i64)
+
+define void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #0 {
+; GFX7-NOT:   define void @needs_dpp(
+; GFX8:       define void @needs_dpp(
+; GFX9:       define void @needs_dpp(
+; GFX10:      define void @needs_dpp(
+; GFX11:      define void @needs_dpp(
+entry:
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %if, label %else
+
+if:
+  %ld = load i64, ptr %in
+  br label %endif
+
+else:
+  %add = add i64 %a, %b
+  br label %endif
+
+endif:
+  %phi = phi i64 [%ld, %if], [%add, %else]
+  store i64 %phi, ptr %out
+  ret void
+}
+
+define void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #1 {
+; GFX7-NOT:   define void @needs_16bit_insts(
+; GFX8:       define void @needs_16bit_insts(
+; GFX9:       define void @needs_16bit_insts(
+; GFX10:      define void @needs_16bit_insts(
+; GFX11:      define void @needs_16bit_insts(
+entry:
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %if, label %else
+
+if:
+  %ld = load i64, ptr %in
+  br label %endif
+
+else:
+  %add = add i64 %a, %b
+  br label %endif
+
+endif:
+  %phi = phi i64 [%ld, %if], [%add, %else]
+  store i64 %phi, ptr %out
+  ret void
+}
+
+define void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #2 {
+; GFX7-NOT:   define void @needs_gfx8_insts(
+; GFX8:       define void @needs_gfx8_insts(
+; GFX9:       define void @needs_gfx8_insts(
+; GFX10:      define void @needs_gfx8_insts(
+; GFX11:      define void @needs_gfx8_insts(
+entry:
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %if, label %else
+
+if:
+  %ld = load i64, ptr %in
+  br label %endif
+
+else:
+  %add = add i64 %a, %b
+  br label %endif
+
+endif:
+  %phi = phi i64 [%ld, %if], [%add, %else]
+  store i64 %phi, ptr %out
+  ret void
+}
+
+define void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #3 {
+; GFX7-NOT:   define void @needs_gfx9_insts(
+; GFX8-NOT:   define void @needs_gfx9_insts(
+; GFX9:       define void @needs_gfx9_insts(
+; GFX10:      define void @needs_gfx9_insts(
+; GFX11:      define void @needs_gfx9_insts(
+entry:
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %if, label %else
+
+if:
+  %ld = load i64, ptr %in
+  br label %endif
+
+else:
+  %add = add i64 %a, %b
+  br label %endif
+
+endif:
+  %phi = phi i64 [%ld, %if], [%add, %else]
+  store i64 %phi, ptr %out
+  ret void
+}
+
+define void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #4 {
+; GFX7-NOT:   define void @needs_gfx10_insts(
+; GFX8-NOT:   define void @needs_gfx10_insts(
+; GFX9-NOT:   define void @needs_gfx10_insts(
+; GFX10:      define void @needs_gfx10_insts(
+; GFX11:      define void @needs_gfx10_insts(
+entry:
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %if, label %else
+
+if:
+  %ld = load i64, ptr %in
+  br label %endif
+
+else:
+  %add = add i64 %a, %b
+  br label %endif
+
+endif:
+  %phi = phi i64 [%ld, %if], [%add, %else]
+  store i64 %phi, ptr %out
+  ret void
+}
+
+define void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #5 {
+; GFX7-NOT:   define void @needs_gfx11_insts(
+; GFX8-NOT:   define void @needs_gfx11_insts(
+; GFX9-NOT:   define void @needs_gfx11_insts(
+; GFX10-NOT:  define void @needs_gfx11_insts(
+; GFX11:      define void @needs_gfx11_insts(
+entry:
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %if, label %else
+
+if:
+  %ld = load i64, ptr %in
+  br label %endif
+
+else:
+  %add = add i64 %a, %b
+  br label %endif
+
+endif:
+  %phi = phi i64 [%ld, %if], [%add, %else]
+  store i64 %phi, ptr %out
+  ret void
+}
+
+define void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #6 {
+; GFX7-NOT:     define void @needs_dot1_insts(
+; GFX8-NOT:     define void @needs_dot1_insts(
+; GFX9:         define void @needs_dot1_insts(
+; GFX10:        define void @needs_dot1_insts(
+; GFX11-NOT:    define void @needs_dot1_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+define void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #7 {
+; GFX7-NOT:     define void @needs_dot2_insts(
+; GFX8-NOT:     define void @needs_dot2_insts(
+; GFX9:         define void @needs_dot2_insts(
+; GFX10:        define void @needs_dot2_insts(
+; GFX11-NOT:    define void @needs_dot2_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+define void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #8 {
+; GFX7-NOT:   define void @needs_dot3_insts(
+; GFX8-NOT:   define void @needs_dot3_insts(
+; GFX906-NOT: define void @needs_dot3_insts(
+; GFX90A:     define void @needs_dot3_insts(
+; GFX10-NOT:  define void @needs_dot3_insts(
+; GFX11-NOT:  define void @needs_dot3_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+
+define void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #9 {
+; GFX7-NOT:   define void @needs_dot4_insts(
+; GFX8-NOT:   define void @needs_dot4_insts(
+; GFX906-NOT: define void @needs_dot4_insts(
+; GFX90A:     define void @needs_dot4_insts(
+; GFX10-NOT:  define void @needs_dot4_insts(
+; GFX11-NOT:  define void @needs_dot4_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+define void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #10 {
+; GFX7-NOT:   define void @needs_dot5_insts(
+; GFX8-NOT:   define void @needs_dot5_insts(
+; GFX906-NOT: define void @needs_dot5_insts(
+; GFX90A:     define void @needs_dot5_insts(
+; GFX10:      define void @needs_dot5_insts(
+; GFX11:      define void @needs_dot5_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+define void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #11 {
+; GFX7-NOT:   define void @needs_dot6_insts(
+; GFX8-NOT:   define void @needs_dot6_insts(
+; GFX906-NOT: define void @needs_dot6_insts(
+; GFX90A:     define void @needs_dot6_insts(
+; GFX10:      define void @needs_dot6_insts(
+; GFX11-NOT:  define void @needs_dot6_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+define void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #12 {
+; GFX7-NOT:   define void @needs_dot7_insts(
+; GFX8-NOT:   define void @needs_dot7_insts(
+; GFX9:       define void @needs_dot7_insts(
+; GFX10:      define void @needs_dot7_insts(
+; GFX11:      define void @needs_dot7_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+define void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #13 {
+; GFX7-NOT:   define void @needs_dot8_insts(
+; GFX8-NOT:   define void @needs_dot8_insts(
+; GFX9-NOT:   define void @needs_dot8_insts(
+; GFX10-NOT:  define void @needs_dot8_insts(
+; GFX11:      define void @needs_dot8_insts(
+  %add = add i64 %a, %b
+  store i64 %add, ptr %out
+  ret void
+}
+
+; IR: define void @caller(
+define void @caller(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) {
+  ; GFX7: call void null(
+  ; GFX8: call void @needs_dpp(
+  ; GFX9: call void @needs_dpp(
+  ; GFX10: call void @needs_dpp(
+  ; GFX11: call void @needs_dpp(
+  call void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void @needs_16bit_insts(
+  ; GFX9: call void @needs_16bit_insts(
+  ; GFX10: call void @needs_16bit_insts(
+  ; GFX11: call void @needs_16bit_insts(
+  call void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void @needs_gfx8_insts(
+  ; GFX9: call void @needs_gfx8_insts(
+  ; GFX10: call void @needs_gfx8_insts(
+  ; GFX11: call void @needs_gfx8_insts(
+  call void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX9: call void @needs_gfx9_insts(
+  ; GFX10: call void @needs_gfx9_insts(
+  ; GFX111: call void @needs_gfx9_insts(c
+  call void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX9: call void null(
+  ; GFX10: call void @needs_gfx10_insts(
+  ; GFX111: call void @needs_gfx10_insts(
+  call void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX9: call void null(
+  ; GFX10: call void null(
+  ; GFX11: call void @needs_gfx11_insts(
+  call void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX9: call void @needs_dot1_insts(
+  ; GFX10: call void @needs_dot1_insts(
+  ; GFX11: call void null(
+  call void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX9: call void @needs_dot2_insts(
+  ; GFX10: call void @needs_dot2_insts(
+  ; GFX11: call void null(
+  call void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX906: call void null(
+  ; GFX90A: call void @needs_dot3_insts(
+  ; GFX10: call void null(
+  ; GFX11: call void null(
+  call void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX906: call void null(
+  ; GFX90A: call void @needs_dot4_insts(
+  ; GFX10: call void null(
+  ; GFX11: call void null(
+  call void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX906: call void null(
+  ; GFX90A: call void @needs_dot5_insts(
+  ; GFX10: call void @needs_dot5_insts(
+  ; GFX11: call void @needs_dot5_insts(
+  call void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX906: call void null(
+  ; GFX90A: call void @needs_dot6_insts(
+  ; GFX10: call void @needs_dot6_insts(
+  ; GFX11: call void null(
+  call void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX9: call void @needs_dot7_insts(
+  ; GFX10: call void @needs_dot7_insts(
+  ; GFX11: call void @needs_dot7_insts(
+  call void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; GFX7: call void null(
+  ; GFX8: call void null(
+  ; GFX9: call void null(
+  ; GFX10: call void null(
+  ; GFX11: call void @needs_dot8_insts(
+  call void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c)
+  ; IR: ret void
+  ret void
+}
+
+attributes #0 = { "target-features"="+dpp" }
+attributes #1 = { "target-features"="+16-bit-insts" }
+attributes #2 = { "target-features"="+gfx8-insts" }
+attributes #3 = { "target-features"="+gfx9-insts" }
+attributes #4 = { "target-features"="+gfx10-insts" }
+attributes #5 = { "target-features"="+gfx11-insts" }
+attributes #6 = { "target-features"="+dot1-insts" }
+attributes #7 = { "target-features"="+dot2-insts" }
+attributes #8 = { "target-features"="+dot3-insts" }
+attributes #9 = { "target-features"="+dot4-insts" }
+attributes #10 = { "target-features"="+dot5-insts" }
+attributes #11 = { "target-features"="+dot6-insts" }
+attributes #12 = { "target-features"="+dot7-insts" }
+attributes #13 = { "target-features"="+dot8-insts" }


        


More information about the llvm-commits mailing list