[llvm] IR: Add verifier plugins for intrinsic verification (PR #159415)

Wed Sep 17 11:02:48 PDT 2025

https://github.com/nhaehnle created https://github.com/llvm/llvm-project/pull/159415

Move target-specific logic into target code.

The main motivation is that target intrinsics can be very complex. Verifying them properly can benefit from using target-specific infrastructure that is not available in the core library that contains the verifier. Verifying target intrinsics via a "plugin" solves this issue.

This does mean that full target-specific verification only happens when the target in question was compiled and initialized. One slightly unfortunate side effect is that llvm-as needs to link against targets in order to fully verify the parsed IR assembly. This shouldn't be a real problem due to dynamic linking, so it seems like a reasonable compromise.

I considered the alternative of adding a hook into TargetTransformInfo, as that is how a similar refactoring was done for InstCombine. However, the verifier is invoked in many places e.g. via llvm::verifyModule where TargetTransformInfo may not be readily available.

>From 168ea1ad8ffa7b9c286adbb153d2276ab2d16ad6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle at amd.com>
Date: Thu, 28 Aug 2025 10:31:22 -0700
Subject: [PATCH] IR: Add verifier plugins for intrinsic verification

Move target-specific logic into target code.

The main motivation is that target intrinsics can be very complex.
Verifying them properly can benefit from using target-specific
infrastructure that is not available in the core library that
contains the verifier. Verifying target intrinsics via a "plugin" solves
this issue.

This does mean that full target-specific verification only happens when
the target in question was compiled and initialized. One slightly
unfortunate side effect is that llvm-as needs to link against targets in
order to fully verify the parsed IR assembly. This shouldn't be a real
problem due to dynamic linking, so it seems like a reasonable compromise.

I considered the alternative of adding a hook into TargetTransformInfo,
as that is how a similar refactoring was done for InstCombine. However,
the verifier is invoked in many places e.g. via llvm::verifyModule
where TargetTransformInfo may not be readily available.
---
 llvm/include/llvm/IR/Verifier.h               | 119 +++-
 llvm/lib/IR/Verifier.cpp                      | 664 +++++++-----------
 llvm/lib/Target/AArch64/AArch64.h             |   2 +
 .../Target/AArch64/AArch64TargetMachine.cpp   |   1 +
 llvm/lib/Target/AArch64/AArch64Verifier.cpp   |  70 ++
 llvm/lib/Target/AArch64/CMakeLists.txt        |   1 +
 llvm/lib/Target/AMDGPU/AMDGPU.h               |   2 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   2 +
 llvm/lib/Target/AMDGPU/AMDGPUVerifier.cpp     | 266 +++++++
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |   1 +
 llvm/lib/Target/ARM/ARM.h                     |   2 +
 llvm/lib/Target/ARM/ARMTargetMachine.cpp      |   2 +
 llvm/lib/Target/ARM/ARMVerifier.cpp           |  58 ++
 llvm/lib/Target/ARM/CMakeLists.txt            |   1 +
 llvm/lib/Target/NVPTX/CMakeLists.txt          |   1 +
 llvm/lib/Target/NVPTX/NVPTX.h                 |   2 +
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  |   2 +
 llvm/lib/Target/NVPTX/NVVMVerifier.cpp        |  51 ++
 llvm/tools/llvm-as/CMakeLists.txt             |   2 +
 llvm/tools/llvm-as/llvm-as.cpp                |   2 +
 20 files changed, 851 insertions(+), 400 deletions(-)
 create mode 100644 llvm/lib/Target/AArch64/AArch64Verifier.cpp
 create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUVerifier.cpp
 create mode 100644 llvm/lib/Target/ARM/ARMVerifier.cpp
 create mode 100644 llvm/lib/Target/NVPTX/NVVMVerifier.cpp

diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h
index 8dbb9c8a41d7e..135c6ab4ebb1f 100644
--- a/llvm/include/llvm/IR/Verifier.h
+++ b/llvm/include/llvm/IR/Verifier.h
@@ -21,20 +21,135 @@
 #define LLVM_IR_VERIFIER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/DebugProgramInstruction.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Printable.h"
 #include <utility>
 
 namespace llvm {
 
 class APInt;
+class Attribute;
+class AttributeList;
+class AttributeSet;
+class CallBase;
+class Comdat;
+class DataLayout;
 class Function;
 class FunctionPass;
 class Instruction;
-class MDNode;
+class LLVMContext;
 class Module;
+class Triple;
+class VerifierSupport;
 class raw_ostream;
-struct VerifierSupport;
+
+/// Base class for IR verifier plugins.
+///
+/// To add a plugin, derive from this class and then instantiate it once.
+class VerifierPlugin {
+public:
+  VerifierPlugin();
+  virtual ~VerifierPlugin();
+
+  /// Called when the verifier finds a call (or invoke) to an intrinsic it
+  /// doesn't understand.
+  ///
+  /// If the plugin recognizes the intrinsic, it should report any verifier
+  /// errors via the given helper object.
+  virtual void verifyIntrinsicCall(CallBase &Call, VerifierSupport &VS) const;
+};
+
+class VerifierSupport {
+public:
+  raw_ostream *OS;
+  const Module &M;
+  ModuleSlotTracker MST;
+  const Triple &TT;
+  const DataLayout &DL;
+  LLVMContext &Context;
+
+  /// Track the brokenness of the module while recursively visiting.
+  bool Broken = false;
+  /// Broken debug info can be "recovered" from by stripping the debug info.
+  bool BrokenDebugInfo = false;
+  /// Whether to treat broken debug info as an error.
+  bool TreatBrokenDebugInfoAsError = true;
+
+  explicit VerifierSupport(raw_ostream *OS, const Module &M);
+
+private:
+  LLVM_ABI void Write(const Module *M);
+  LLVM_ABI void Write(const Value *V);
+  LLVM_ABI void Write(const Value &V);
+  LLVM_ABI void Write(const DbgRecord *DR);
+  LLVM_ABI void Write(DbgVariableRecord::LocationType Type);
+  LLVM_ABI void Write(const Metadata *MD);
+
+  template <class T> void Write(const MDTupleTypedArrayWrapper<T> &MD) {
+    Write(MD.get());
+  }
+
+  LLVM_ABI void Write(const NamedMDNode *NMD);
+  LLVM_ABI void Write(Type *T);
+  LLVM_ABI void Write(const Comdat *C);
+  LLVM_ABI void Write(const APInt *AI);
+  LLVM_ABI void Write(const unsigned i) { *OS << i << '\n'; }
+
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  LLVM_ABI void Write(const Attribute *A);
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  LLVM_ABI void Write(const AttributeSet *AS);
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  LLVM_ABI void Write(const AttributeList *AL);
+  LLVM_ABI void Write(Printable P) { *OS << P << '\n'; }
+
+  template <typename T> void Write(ArrayRef<T> Vs) {
+    for (const T &V : Vs)
+      Write(V);
+  }
+
+  template <typename T1, typename... Ts>
+  void WriteTs(const T1 &V1, const Ts &...Vs) {
+    Write(V1);
+    WriteTs(Vs...);
+  }
+
+  template <typename... Ts> void WriteTs() {}
+
+public:
+  /// A check failed, so printout out the condition and the message.
+  ///
+  /// This provides a nice place to put a breakpoint if you want to see why
+  /// something is not correct.
+  LLVM_ABI void CheckFailed(const Twine &Message);
+
+  /// A check failed (with values to print).
+  ///
+  /// This calls the Message-only version so that the above is easier to set a
+  /// breakpoint on.
+  template <typename T1, typename... Ts>
+  void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
+    CheckFailed(Message);
+    if (OS)
+      WriteTs(V1, Vs...);
+  }
+
+  /// A debug info check failed.
+  LLVM_ABI void DebugInfoCheckFailed(const Twine &Message);
+
+  /// A debug info check failed (with values to print).
+  template <typename T1, typename... Ts>
+  void DebugInfoCheckFailed(const Twine &Message, const T1 &V1,
+                            const Ts &...Vs) {
+    DebugInfoCheckFailed(Message);
+    if (OS)
+      WriteTs(V1, Vs...);
+  }
+};
 
 /// Verify that the TBAA Metadatas are valid.
 class TBAAVerifier {
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index c06b60fd2d9a9..0b393f39e75ff 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -119,6 +119,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ModRef.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -127,6 +128,7 @@
 #include <memory>
 #include <optional>
 #include <string>
+#include <thread>
 #include <utility>
 
 using namespace llvm;
@@ -136,194 +138,307 @@ static cl::opt<bool> VerifyNoAliasScopeDomination(
     cl::desc("Ensure that llvm.experimental.noalias.scope.decl for identical "
              "scopes are not dominating"));
 
-namespace llvm {
+namespace {
 
-struct VerifierSupport {
-  raw_ostream *OS;
-  const Module &M;
-  ModuleSlotTracker MST;
-  const Triple &TT;
-  const DataLayout &DL;
-  LLVMContext &Context;
+class PluginRegistryLock;
+class PluginRegistryReader;
 
-  /// Track the brokenness of the module while recursively visiting.
-  bool Broken = false;
-  /// Broken debug info can be "recovered" from by stripping the debug info.
-  bool BrokenDebugInfo = false;
-  /// Whether to treat broken debug info as an error.
-  bool TreatBrokenDebugInfoAsError = true;
+/// Registry for verifier plugins.
+///
+/// The registry satifies the following implementation constraints:
+///
+///  * Support dynamically loading and unloading plugins from a thread (e.g.
+///    from dlopen()/dlclose()) while another thread may be in the verifier
+///  * Fast path for iterating over plugins that is lock-free and avoids
+///    cache-line ping pong
+///  * Plugin teardown may happen due to report_fatal_error from a thread that
+///    is currently in the verifier
+///
+/// The implementation achieves this by registering a "reader" object while the
+/// verifier is active. The reader object holds a hazard pointer to the plugins
+/// list that it is currently using.
+class PluginRegistry {
+  friend PluginRegistryReader;
+  friend PluginRegistryLock;
 
-  explicit VerifierSupport(raw_ostream *OS, const Module &M)
-      : OS(OS), M(M), MST(&M), TT(M.getTargetTriple()), DL(M.getDataLayout()),
-        Context(M.getContext()) {}
+  using List = SmallVector<const VerifierPlugin *>;
 
-private:
-  void Write(const Module *M) {
-    *OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
-  }
+  sys::Mutex Mutex;
+  SmallVector<PluginRegistryReader *> Readers;
+  List PluginsStorage[2];
+  std::atomic<List *> Plugins;
 
-  void Write(const Value *V) {
-    if (V)
-      Write(*V);
+  PluginRegistry() {
+    Plugins.store(&PluginsStorage[0], std::memory_order_relaxed);
   }
 
-  void Write(const Value &V) {
-    if (isa<Instruction>(V)) {
-      V.print(*OS, MST);
-      *OS << '\n';
-    } else {
-      V.printAsOperand(*OS, true, MST);
-      *OS << '\n';
-    }
-  }
+  template <typename FnT> void updatePlugins(FnT &&F);
 
-  void Write(const DbgRecord *DR) {
-    if (DR) {
-      DR->print(*OS, MST, false);
-      *OS << '\n';
-    }
+public:
+  static PluginRegistry &get() {
+    static PluginRegistry R;
+    return R;
   }
 
-  void Write(DbgVariableRecord::LocationType Type) {
-    switch (Type) {
-    case DbgVariableRecord::LocationType::Value:
-      *OS << "value";
-      break;
-    case DbgVariableRecord::LocationType::Declare:
-      *OS << "declare";
-      break;
-    case DbgVariableRecord::LocationType::Assign:
-      *OS << "assign";
-      break;
-    case DbgVariableRecord::LocationType::End:
-      *OS << "end";
-      break;
-    case DbgVariableRecord::LocationType::Any:
-      *OS << "any";
-      break;
-    };
+  void addPlugin(const VerifierPlugin *P) {
+    updatePlugins([&](List &Plugins) { Plugins.push_back(P); });
   }
 
-  void Write(const Metadata *MD) {
-    if (!MD)
-      return;
-    MD->print(*OS, MST, &M);
-    *OS << '\n';
+  void removePlugin(const VerifierPlugin *P) {
+    updatePlugins([&](List &Plugins) {
+      Plugins.erase(std::remove(Plugins.begin(), Plugins.end(), P),
+                    Plugins.end());
+    });
   }
 
-  template <class T> void Write(const MDTupleTypedArrayWrapper<T> &MD) {
-    Write(MD.get());
+  void addReader(PluginRegistryReader *R) {
+    if (llvm_is_multithreaded()) {
+      sys::ScopedLock Lock(Mutex);
+      Readers.push_back(R);
+    }
   }
 
-  void Write(const NamedMDNode *NMD) {
-    if (!NMD)
-      return;
-    NMD->print(*OS, MST);
-    *OS << '\n';
+  void removeReader(PluginRegistryReader *R) {
+    if (llvm_is_multithreaded()) {
+      sys::ScopedLock Lock(Mutex);
+      Readers.erase(std::remove(Readers.begin(), Readers.end(), R),
+                    Readers.end());
+    }
   }
+};
 
-  void Write(Type *T) {
-    if (!T)
-      return;
-    *OS << ' ' << *T;
-  }
+class PluginRegistryReader {
+  friend PluginRegistry;
+  friend PluginRegistryLock;
 
-  void Write(const Comdat *C) {
-    if (!C)
-      return;
-    *OS << *C;
-  }
+  std::atomic<PluginRegistry::List *> HazardPtr;
 
-  void Write(const APInt *AI) {
-    if (!AI)
-      return;
-    *OS << *AI << '\n';
+public:
+  PluginRegistryReader() {
+    HazardPtr.store(nullptr, std::memory_order_relaxed);
+    PluginRegistry::get().addReader(this);
   }
 
-  void Write(const unsigned i) { *OS << i << '\n'; }
+  ~PluginRegistryReader() { PluginRegistry::get().removeReader(this); }
+};
 
-  // NOLINTNEXTLINE(readability-identifier-naming)
-  void Write(const Attribute *A) {
-    if (!A)
-      return;
-    *OS << A->getAsString() << '\n';
-  }
+// Thread-safe update of the plugins list. Take the lock, copy & update the
+// list, then wait for all readers to let go of the old version of the list
+// before releasing the lock.
+template <typename FnT> void PluginRegistry::updatePlugins(FnT &&F) {
+  if (llvm_is_multithreaded()) {
+    sys::ScopedLock Lock(Mutex);
+
+    List *OldList = Plugins.load(std::memory_order_relaxed);
+    List *NewList = (OldList == &PluginsStorage[0]) ? &PluginsStorage[1]
+                                                    : &PluginsStorage[0];
+
+    // We're about to write to NewList. Spin wait to ensure no reader is
+    // accessing it.
+    for (auto *R : Readers) {
+      while (R->HazardPtr.load(std::memory_order_seq_cst) == NewList) {
+        // Let's yield to avoid a pathological busy wait. This really should
+        // only happen in the corner case where multiple users of LLVM exist
+        // in the same process and are initialized or torn down concurrently,
+        // so don't sweat the details.
+        std::this_thread::yield();
+      }
+    }
 
-  // NOLINTNEXTLINE(readability-identifier-naming)
-  void Write(const AttributeSet *AS) {
-    if (!AS)
-      return;
-    *OS << AS->getAsString() << '\n';
+    *NewList = *OldList;
+    F(*NewList);
+
+    Plugins.store(NewList, std::memory_order_seq_cst);
+  } else {
+    // Avoid unnecessary copies when compiling without multi-threading
+    // support.
+    F(*Plugins.load(std::memory_order_relaxed));
   }
+}
 
-  // NOLINTNEXTLINE(readability-identifier-naming)
-  void Write(const AttributeList *AL) {
-    if (!AL)
-      return;
-    AL->print(*OS);
+class PluginRegistryLock {
+  PluginRegistryLock(PluginRegistryLock &) = delete;
+  PluginRegistryLock(PluginRegistryLock &&) = delete;
+  PluginRegistryLock &operator=(PluginRegistryLock &) = delete;
+  PluginRegistryLock &operator=(PluginRegistryLock &&) = delete;
+
+  PluginRegistryReader &Reader;
+
+public:
+  explicit PluginRegistryLock(PluginRegistryReader &Reader) : Reader(Reader) {
+    assert(!Reader.HazardPtr &&
+           "cannot have multiple PluginRegistryLocks through the same reader");
+
+    auto &Registry = PluginRegistry::get();
+
+    // The memory order of the initial load is irrelevant since we re-check the
+    // pointer using a sequentially consistent load later.
+    PluginRegistry::List *L = Registry.Plugins.load(std::memory_order_relaxed);
+
+    if (llvm_is_multithreaded()) {
+      for (;;) {
+        Reader.HazardPtr.store(L, std::memory_order_seq_cst);
+
+        PluginRegistry::List *Check =
+            Registry.Plugins.load(std::memory_order_seq_cst);
+        if (Check == L)
+          break;
+
+        L = Check;
+      }
+    } else {
+      Reader.HazardPtr.store(L, std::memory_order_relaxed);
+    }
   }
 
-  void Write(Printable P) { *OS << P << '\n'; }
+  ~PluginRegistryLock() {
+    assert(Reader.HazardPtr);
 
-  template <typename T> void Write(ArrayRef<T> Vs) {
-    for (const T &V : Vs)
-      Write(V);
+    if (llvm_is_multithreaded()) {
+      Reader.HazardPtr.store(nullptr, std::memory_order_seq_cst);
+    } else {
+      Reader.HazardPtr.store(nullptr, std::memory_order_relaxed);
+    }
   }
 
-  template <typename T1, typename... Ts>
-  void WriteTs(const T1 &V1, const Ts &... Vs) {
-    Write(V1);
-    WriteTs(Vs...);
+  ArrayRef<const VerifierPlugin *> get() const {
+    return *Reader.HazardPtr.load(std::memory_order_relaxed);
   }
+};
 
-  template <typename... Ts> void WriteTs() {}
+} // anonymous namespace
 
-public:
-  /// A check failed, so printout out the condition and the message.
-  ///
-  /// This provides a nice place to put a breakpoint if you want to see why
-  /// something is not correct.
-  void CheckFailed(const Twine &Message) {
-    if (OS)
-      *OS << Message << '\n';
-    Broken = true;
+VerifierPlugin::VerifierPlugin() { PluginRegistry::get().addPlugin(this); }
+
+VerifierPlugin::~VerifierPlugin() { PluginRegistry::get().removePlugin(this); }
+
+void VerifierPlugin::verifyIntrinsicCall(CallBase &Call,
+                                         VerifierSupport &VS) const {}
+
+VerifierSupport::VerifierSupport(raw_ostream *OS, const Module &M)
+    : OS(OS), M(M), MST(&M), TT(M.getTargetTriple()), DL(M.getDataLayout()),
+      Context(M.getContext()) {}
+
+void VerifierSupport::Write(const Module *M) {
+  *OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+}
+
+void VerifierSupport::Write(const Value *V) {
+  if (V)
+    Write(*V);
+}
+
+void VerifierSupport::Write(const Value &V) {
+  if (isa<Instruction>(V)) {
+    V.print(*OS, MST);
+    *OS << '\n';
+  } else {
+    V.printAsOperand(*OS, true, MST);
+    *OS << '\n';
   }
+}
 
-  /// A check failed (with values to print).
-  ///
-  /// This calls the Message-only version so that the above is easier to set a
-  /// breakpoint on.
-  template <typename T1, typename... Ts>
-  void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) {
-    CheckFailed(Message);
-    if (OS)
-      WriteTs(V1, Vs...);
-  }
-
-  /// A debug info check failed.
-  void DebugInfoCheckFailed(const Twine &Message) {
-    if (OS)
-      *OS << Message << '\n';
-    Broken |= TreatBrokenDebugInfoAsError;
-    BrokenDebugInfo = true;
-  }
-
-  /// A debug info check failed (with values to print).
-  template <typename T1, typename... Ts>
-  void DebugInfoCheckFailed(const Twine &Message, const T1 &V1,
-                            const Ts &... Vs) {
-    DebugInfoCheckFailed(Message);
-    if (OS)
-      WriteTs(V1, Vs...);
+void VerifierSupport::Write(const DbgRecord *DR) {
+  if (DR) {
+    DR->print(*OS, MST, false);
+    *OS << '\n';
   }
-};
+}
+
+void VerifierSupport::Write(DbgVariableRecord::LocationType Type) {
+  switch (Type) {
+  case DbgVariableRecord::LocationType::Value:
+    *OS << "value";
+    break;
+  case DbgVariableRecord::LocationType::Declare:
+    *OS << "declare";
+    break;
+  case DbgVariableRecord::LocationType::Assign:
+    *OS << "assign";
+    break;
+  case DbgVariableRecord::LocationType::End:
+    *OS << "end";
+    break;
+  case DbgVariableRecord::LocationType::Any:
+    *OS << "any";
+    break;
+  };
+}
 
-} // namespace llvm
+void VerifierSupport::Write(const Metadata *MD) {
+  if (!MD)
+    return;
+  MD->print(*OS, MST, &M);
+  *OS << '\n';
+}
+
+void VerifierSupport::Write(const NamedMDNode *NMD) {
+  if (!NMD)
+    return;
+  NMD->print(*OS, MST);
+  *OS << '\n';
+}
+
+void VerifierSupport::Write(Type *T) {
+  if (!T)
+    return;
+  *OS << ' ' << *T;
+}
+
+void VerifierSupport::Write(const Comdat *C) {
+  if (!C)
+    return;
+  *OS << *C;
+}
+
+void VerifierSupport::Write(const APInt *AI) {
+  if (!AI)
+    return;
+  *OS << *AI << '\n';
+}
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+void VerifierSupport::Write(const Attribute *A) {
+  if (!A)
+    return;
+  *OS << A->getAsString() << '\n';
+}
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+void VerifierSupport::Write(const AttributeSet *AS) {
+  if (!AS)
+    return;
+  *OS << AS->getAsString() << '\n';
+}
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+void VerifierSupport::Write(const AttributeList *AL) {
+  if (!AL)
+    return;
+  AL->print(*OS);
+}
+
+void VerifierSupport::CheckFailed(const Twine &Message) {
+  if (OS)
+    *OS << Message << '\n';
+  Broken = true;
+}
+
+/// A debug info check failed.
+void VerifierSupport::DebugInfoCheckFailed(const Twine &Message) {
+  if (OS)
+    *OS << Message << '\n';
+  Broken |= TreatBrokenDebugInfoAsError;
+  BrokenDebugInfo = true;
+}
 
 namespace {
 
 class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   friend class InstVisitor<Verifier>;
+
+  PluginRegistryReader PluginsReader;
+
   DominatorTree DT;
 
   /// When verifying a basic block, keep track of all of the
@@ -5660,8 +5775,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   }
 
   switch (ID) {
-  default:
+  default: {
+    PluginRegistryLock Lock(PluginsReader);
+    for (const VerifierPlugin *P : Lock.get())
+      P->verifyIntrinsicCall(Call, *this);
     break;
+  }
   case Intrinsic::assume: {
     for (auto &Elem : Call.bundle_op_infos()) {
       unsigned ArgCount = Elem.End - Elem.Begin;
@@ -6549,37 +6668,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     break;
   }
   case Intrinsic::preserve_array_access_index:
-  case Intrinsic::preserve_struct_access_index:
-  case Intrinsic::aarch64_ldaxr:
-  case Intrinsic::aarch64_ldxr:
-  case Intrinsic::arm_ldaex:
-  case Intrinsic::arm_ldrex: {
+  case Intrinsic::preserve_struct_access_index: {
     Type *ElemTy = Call.getParamElementType(0);
     Check(ElemTy, "Intrinsic requires elementtype attribute on first argument.",
           &Call);
     break;
   }
-  case Intrinsic::aarch64_stlxr:
-  case Intrinsic::aarch64_stxr:
-  case Intrinsic::arm_stlex:
-  case Intrinsic::arm_strex: {
-    Type *ElemTy = Call.getAttributes().getParamElementType(1);
-    Check(ElemTy,
-          "Intrinsic requires elementtype attribute on second argument.",
-          &Call);
-    break;
-  }
-  case Intrinsic::aarch64_prefetch: {
-    Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
-          "write argument to llvm.aarch64.prefetch must be 0 or 1", Call);
-    Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
-          "target argument to llvm.aarch64.prefetch must be 0-3", Call);
-    Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2,
-          "stream argument to llvm.aarch64.prefetch must be 0 or 1", Call);
-    Check(cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue() < 2,
-          "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
-    break;
-  }
   case Intrinsic::callbr_landingpad: {
     const auto *CBR = dyn_cast<CallBrInst>(Call.getOperand(0));
     Check(CBR, "intrinstic requires callbr operand", &Call);
@@ -6606,232 +6700,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
           &Call);
     break;
   }
-  case Intrinsic::amdgcn_cs_chain: {
-    auto CallerCC = Call.getCaller()->getCallingConv();
-    switch (CallerCC) {
-    case CallingConv::AMDGPU_CS:
-    case CallingConv::AMDGPU_CS_Chain:
-    case CallingConv::AMDGPU_CS_ChainPreserve:
-      break;
-    default:
-      CheckFailed("Intrinsic can only be used from functions with the "
-                  "amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve "
-                  "calling conventions",
-                  &Call);
-      break;
-    }
-
-    Check(Call.paramHasAttr(2, Attribute::InReg),
-          "SGPR arguments must have the `inreg` attribute", &Call);
-    Check(!Call.paramHasAttr(3, Attribute::InReg),
-          "VGPR arguments must not have the `inreg` attribute", &Call);
-
-    auto *Next = Call.getNextNode();
-    bool IsAMDUnreachable = Next && isa<IntrinsicInst>(Next) &&
-                            cast<IntrinsicInst>(Next)->getIntrinsicID() ==
-                                Intrinsic::amdgcn_unreachable;
-    Check(Next && (isa<UnreachableInst>(Next) || IsAMDUnreachable),
-          "llvm.amdgcn.cs.chain must be followed by unreachable", &Call);
-    break;
-  }
-  case Intrinsic::amdgcn_init_exec_from_input: {
-    const Argument *Arg = dyn_cast<Argument>(Call.getOperand(0));
-    Check(Arg && Arg->hasInRegAttr(),
-          "only inreg arguments to the parent function are valid as inputs to "
-          "this intrinsic",
-          &Call);
-    break;
-  }
-  case Intrinsic::amdgcn_set_inactive_chain_arg: {
-    auto CallerCC = Call.getCaller()->getCallingConv();
-    switch (CallerCC) {
-    case CallingConv::AMDGPU_CS_Chain:
-    case CallingConv::AMDGPU_CS_ChainPreserve:
-      break;
-    default:
-      CheckFailed("Intrinsic can only be used from functions with the "
-                  "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
-                  "calling conventions",
-                  &Call);
-      break;
-    }
-
-    unsigned InactiveIdx = 1;
-    Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
-          "Value for inactive lanes must not have the `inreg` attribute",
-          &Call);
-    Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
-          "Value for inactive lanes must be a function argument", &Call);
-    Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
-          "Value for inactive lanes must be a VGPR function argument", &Call);
-    break;
-  }
-  case Intrinsic::amdgcn_call_whole_wave: {
-    auto F = dyn_cast<Function>(Call.getArgOperand(0));
-    Check(F, "Indirect whole wave calls are not allowed", &Call);
-
-    CallingConv::ID CC = F->getCallingConv();
-    Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
-          "Callee must have the amdgpu_gfx_whole_wave calling convention",
-          &Call);
-
-    Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);
-
-    Check(Call.arg_size() == F->arg_size(),
-          "Call argument count must match callee argument count", &Call);
-
-    // The first argument of the call is the callee, and the first argument of
-    // the callee is the active mask. The rest of the arguments must match.
-    Check(F->arg_begin()->getType()->isIntegerTy(1),
-          "Callee must have i1 as its first argument", &Call);
-    for (auto [CallArg, FuncArg] :
-         drop_begin(zip_equal(Call.args(), F->args()))) {
-      Check(CallArg->getType() == FuncArg.getType(),
-            "Argument types must match", &Call);
-
-      // Check that inreg attributes match between call site and function
-      Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
-                FuncArg.hasInRegAttr(),
-            "Argument inreg attributes must match", &Call);
-    }
-    break;
-  }
-  case Intrinsic::amdgcn_s_prefetch_data: {
-    Check(
-        AMDGPU::isFlatGlobalAddrSpace(
-            Call.getArgOperand(0)->getType()->getPointerAddressSpace()),
-        "llvm.amdgcn.s.prefetch.data only supports global or constant memory");
-    break;
-  }
-  case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
-  case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
-    Value *Src0 = Call.getArgOperand(0);
-    Value *Src1 = Call.getArgOperand(1);
-
-    uint64_t CBSZ = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
-    uint64_t BLGP = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
-    Check(CBSZ <= 4, "invalid value for cbsz format", Call,
-          Call.getArgOperand(3));
-    Check(BLGP <= 4, "invalid value for blgp format", Call,
-          Call.getArgOperand(4));
-
-    // AMDGPU::MFMAScaleFormats values
-    auto getFormatNumRegs = [](unsigned FormatVal) {
-      switch (FormatVal) {
-      case 0:
-      case 1:
-        return 8u;
-      case 2:
-      case 3:
-        return 6u;
-      case 4:
-        return 4u;
-      default:
-        llvm_unreachable("invalid format value");
-      }
-    };
-
-    auto isValidSrcASrcBVector = [](FixedVectorType *Ty) {
-      if (!Ty || !Ty->getElementType()->isIntegerTy(32))
-        return false;
-      unsigned NumElts = Ty->getNumElements();
-      return NumElts == 4 || NumElts == 6 || NumElts == 8;
-    };
-
-    auto *Src0Ty = dyn_cast<FixedVectorType>(Src0->getType());
-    auto *Src1Ty = dyn_cast<FixedVectorType>(Src1->getType());
-    Check(isValidSrcASrcBVector(Src0Ty),
-          "operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0);
-    Check(isValidSrcASrcBVector(Src1Ty),
-          "operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1);
-
-    // Permit excess registers for the format.
-    Check(Src0Ty->getNumElements() >= getFormatNumRegs(CBSZ),
-          "invalid vector type for format", &Call, Src0, Call.getArgOperand(3));
-    Check(Src1Ty->getNumElements() >= getFormatNumRegs(BLGP),
-          "invalid vector type for format", &Call, Src1, Call.getArgOperand(5));
-    break;
-  }
-  case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
-  case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
-  case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
-    Value *Src0 = Call.getArgOperand(1);
-    Value *Src1 = Call.getArgOperand(3);
-
-    unsigned FmtA = cast<ConstantInt>(Call.getArgOperand(0))->getZExtValue();
-    unsigned FmtB = cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue();
-    Check(FmtA <= 4, "invalid value for matrix format", Call,
-          Call.getArgOperand(0));
-    Check(FmtB <= 4, "invalid value for matrix format", Call,
-          Call.getArgOperand(2));
-
-    // AMDGPU::MatrixFMT values
-    auto getFormatNumRegs = [](unsigned FormatVal) {
-      switch (FormatVal) {
-      case 0:
-      case 1:
-        return 16u;
-      case 2:
-      case 3:
-        return 12u;
-      case 4:
-        return 8u;
-      default:
-        llvm_unreachable("invalid format value");
-      }
-    };
-
-    auto isValidSrcASrcBVector = [](FixedVectorType *Ty) {
-      if (!Ty || !Ty->getElementType()->isIntegerTy(32))
-        return false;
-      unsigned NumElts = Ty->getNumElements();
-      return NumElts == 16 || NumElts == 12 || NumElts == 8;
-    };
-
-    auto *Src0Ty = dyn_cast<FixedVectorType>(Src0->getType());
-    auto *Src1Ty = dyn_cast<FixedVectorType>(Src1->getType());
-    Check(isValidSrcASrcBVector(Src0Ty),
-          "operand 1 must be 8, 12 or 16 element i32 vector", &Call, Src0);
-    Check(isValidSrcASrcBVector(Src1Ty),
-          "operand 3 must be 8, 12 or 16 element i32 vector", &Call, Src1);
-
-    // Permit excess registers for the format.
-    Check(Src0Ty->getNumElements() >= getFormatNumRegs(FmtA),
-          "invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
-    Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB),
-          "invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
-    break;
-  }
-  case Intrinsic::amdgcn_cooperative_atomic_load_32x4B:
-  case Intrinsic::amdgcn_cooperative_atomic_load_16x8B:
-  case Intrinsic::amdgcn_cooperative_atomic_load_8x16B:
-  case Intrinsic::amdgcn_cooperative_atomic_store_32x4B:
-  case Intrinsic::amdgcn_cooperative_atomic_store_16x8B:
-  case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: {
-    // Check we only use this intrinsic on the FLAT or GLOBAL address spaces.
-    Value *PtrArg = Call.getArgOperand(0);
-    const unsigned AS = PtrArg->getType()->getPointerAddressSpace();
-    Check(AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS,
-          "cooperative atomic intrinsics require a generic or global pointer",
-          &Call, PtrArg);
-
-    // Last argument must be a MD string
-    auto *Op = cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
-    MDNode *MD = cast<MDNode>(Op->getMetadata());
-    Check((MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
-          "cooperative atomic intrinsics require that the last argument is a "
-          "metadata string",
-          &Call, Op);
-    break;
-  }
-  case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
-  case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
-    Value *V = Call.getArgOperand(0);
-    unsigned RegCount = cast<ConstantInt>(V)->getZExtValue();
-    Check(RegCount % 8 == 0,
-          "reg_count argument to nvvm.setmaxnreg must be in multiples of 8");
-    break;
-  }
   case Intrinsic::experimental_convergence_entry:
   case Intrinsic::experimental_convergence_anchor:
     break;
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 8d0ff41fc8c08..176159dd40766 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -115,6 +115,8 @@ void initializeSMEPeepholeOptPass(PassRegistry &);
 void initializeMachineSMEABIPass(PassRegistry &);
 void initializeSVEIntrinsicOptsPass(PassRegistry &);
 void initializeAArch64Arm64ECCallLoweringPass(PassRegistry &);
+
+void initializeAArch64Verifier();
 } // end namespace llvm
 
 #endif
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index dde1d88403bfe..122a4ebb39d84 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -239,6 +239,7 @@ LLVMInitializeAArch64Target() {
   RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target());
   auto &PR = *PassRegistry::getPassRegistry();
   initializeGlobalISel(PR);
+  initializeAArch64Verifier();
   initializeAArch64A53Fix835769Pass(PR);
   initializeAArch64A57FPLoadBalancingPass(PR);
   initializeAArch64AdvSIMDScalarPass(PR);
diff --git a/llvm/lib/Target/AArch64/AArch64Verifier.cpp b/llvm/lib/Target/AArch64/AArch64Verifier.cpp
new file mode 100644
index 0000000000000..61e3618d11cc7
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64Verifier.cpp
@@ -0,0 +1,70 @@
+//===- AArch64Verifier.h --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// IR verifier plugin for AArch64 intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/Verifier.h"
+
+using namespace llvm;
+
+namespace {
+
+#define Check(C, ...)                                                          \
+  do {                                                                         \
+    if (!(C)) {                                                                \
+      VS.CheckFailed(__VA_ARGS__);                                             \
+      return;                                                                  \
+    }                                                                          \
+  } while (false)
+
+class AArch64Verifier : public VerifierPlugin {
+public:
+  void verifyIntrinsicCall(CallBase &Call, VerifierSupport &VS) const override {
+    switch (Call.getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::aarch64_ldaxr:
+    case Intrinsic::aarch64_ldxr: {
+      Type *ElemTy = Call.getParamElementType(0);
+      Check(ElemTy,
+            "Intrinsic requires elementtype attribute on first argument.",
+            &Call);
+      break;
+    }
+    case Intrinsic::aarch64_stlxr:
+    case Intrinsic::aarch64_stxr: {
+      Type *ElemTy = Call.getParamElementType(1);
+      Check(ElemTy,
+            "Intrinsic requires elementtype attribute on second argument.",
+            &Call);
+      break;
+    }
+    case Intrinsic::aarch64_prefetch: {
+      Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
+            "write argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+      Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+            "target argument to llvm.aarch64.prefetch must be 0-3", Call);
+      Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2,
+            "stream argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+      Check(cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue() < 2,
+            "isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
+      break;
+    }
+    }
+  }
+};
+
+} // anonymous namespace
+
+void llvm::initializeAArch64Verifier() { static AArch64Verifier Verifier; }
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index a8185358d6dfc..e96cc88b841d2 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -87,6 +87,7 @@ add_llvm_target(AArch64CodeGen
   AArch64TargetMachine.cpp
   AArch64TargetObjectFile.cpp
   AArch64TargetTransformInfo.cpp
+  AArch64Verifier.cpp
   SMEABIPass.cpp
   SMEPeepholeOpt.cpp
   SVEIntrinsicOpts.cpp
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 0f2c33585884f..8aa4231a959e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -22,6 +22,8 @@ class AMDGPUTargetMachine;
 class GCNTargetMachine;
 class TargetMachine;
 
+void initializeAMDGPUVerifier();
+
 // GlobalISel passes
 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 92a587b5771b6..3a4a7a90911ae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -531,6 +531,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
   RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
 
+  initializeAMDGPUVerifier();
+
   PassRegistry *PR = PassRegistry::getPassRegistry();
   initializeR600ClauseMergePassPass(*PR);
   initializeR600ControlFlowFinalizerPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUVerifier.cpp b/llvm/lib/Target/AMDGPU/AMDGPUVerifier.cpp
new file mode 100644
index 0000000000000..5d555abd30a70
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUVerifier.cpp
@@ -0,0 +1,266 @@
+//===- AMDGPUVerifier.h ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// IR verifier plugin for AMDGPU intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Verifier.h"
+
+using namespace llvm;
+
+namespace {
+
+#define Check(C, ...)                                                          \
+  do {                                                                         \
+    if (!(C)) {                                                                \
+      VS.CheckFailed(__VA_ARGS__);                                             \
+      return;                                                                  \
+    }                                                                          \
+  } while (false)
+
+class AMDGPUVerifier : public VerifierPlugin {
+public:
+  void verifyIntrinsicCall(CallBase &Call, VerifierSupport &VS) const override {
+    switch (Call.getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::amdgcn_cs_chain: {
+      auto CallerCC = Call.getCaller()->getCallingConv();
+      switch (CallerCC) {
+      case CallingConv::AMDGPU_CS:
+      case CallingConv::AMDGPU_CS_Chain:
+      case CallingConv::AMDGPU_CS_ChainPreserve:
+        break;
+      default:
+        VS.CheckFailed("Intrinsic can only be used from functions with the "
+                       "amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve "
+                       "calling conventions",
+                       &Call);
+        break;
+      }
+
+      Check(Call.paramHasAttr(2, Attribute::InReg),
+            "SGPR arguments must have the `inreg` attribute", &Call);
+      Check(!Call.paramHasAttr(3, Attribute::InReg),
+            "VGPR arguments must not have the `inreg` attribute", &Call);
+
+      auto *Next = Call.getNextNode();
+      bool IsAMDUnreachable = Next && isa<IntrinsicInst>(Next) &&
+                              cast<IntrinsicInst>(Next)->getIntrinsicID() ==
+                                  Intrinsic::amdgcn_unreachable;
+      Check(Next && (isa<UnreachableInst>(Next) || IsAMDUnreachable),
+            "llvm.amdgcn.cs.chain must be followed by unreachable", &Call);
+      break;
+    }
+    case Intrinsic::amdgcn_init_exec_from_input: {
+      const Argument *Arg = dyn_cast<Argument>(Call.getOperand(0));
+      Check(
+          Arg && Arg->hasInRegAttr(),
+          "only inreg arguments to the parent function are valid as inputs to "
+          "this intrinsic",
+          &Call);
+      break;
+    }
+    case Intrinsic::amdgcn_set_inactive_chain_arg: {
+      auto CallerCC = Call.getCaller()->getCallingConv();
+      switch (CallerCC) {
+      case CallingConv::AMDGPU_CS_Chain:
+      case CallingConv::AMDGPU_CS_ChainPreserve:
+        break;
+      default:
+        VS.CheckFailed("Intrinsic can only be used from functions with the "
+                       "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
+                       "calling conventions",
+                       &Call);
+        break;
+      }
+
+      unsigned InactiveIdx = 1;
+      Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
+            "Value for inactive lanes must not have the `inreg` attribute",
+            &Call);
+      Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
+            "Value for inactive lanes must be a function argument", &Call);
+      Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
+            "Value for inactive lanes must be a VGPR function argument", &Call);
+      break;
+    }
+    case Intrinsic::amdgcn_call_whole_wave: {
+      auto F = dyn_cast<Function>(Call.getArgOperand(0));
+      Check(F, "Indirect whole wave calls are not allowed", &Call);
+
+      CallingConv::ID CC = F->getCallingConv();
+      Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
+            "Callee must have the amdgpu_gfx_whole_wave calling convention",
+            &Call);
+
+      Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);
+
+      Check(Call.arg_size() == F->arg_size(),
+            "Call argument count must match callee argument count", &Call);
+
+      // The first argument of the call is the callee, and the first argument of
+      // the callee is the active mask. The rest of the arguments must match.
+      Check(F->arg_begin()->getType()->isIntegerTy(1),
+            "Callee must have i1 as its first argument", &Call);
+      for (auto [CallArg, FuncArg] :
+           drop_begin(zip_equal(Call.args(), F->args()))) {
+        Check(CallArg->getType() == FuncArg.getType(),
+              "Argument types must match", &Call);
+
+        // Check that inreg attributes match between call site and function
+        Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
+                  FuncArg.hasInRegAttr(),
+              "Argument inreg attributes must match", &Call);
+      }
+      break;
+    }
+    case Intrinsic::amdgcn_s_prefetch_data: {
+      Check(AMDGPU::isFlatGlobalAddrSpace(
+                Call.getArgOperand(0)->getType()->getPointerAddressSpace()),
+            "llvm.amdgcn.s.prefetch.data only supports global or constant "
+            "memory");
+      break;
+    }
+    case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
+    case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
+      Value *Src0 = Call.getArgOperand(0);
+      Value *Src1 = Call.getArgOperand(1);
+
+      uint64_t CBSZ = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
+      uint64_t BLGP = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
+      Check(CBSZ <= 4, "invalid value for cbsz format", Call,
+            Call.getArgOperand(3));
+      Check(BLGP <= 4, "invalid value for blgp format", Call,
+            Call.getArgOperand(4));
+
+      // AMDGPU::MFMAScaleFormats values
+      auto getFormatNumRegs = [](unsigned FormatVal) {
+        switch (FormatVal) {
+        case 0:
+        case 1:
+          return 8u;
+        case 2:
+        case 3:
+          return 6u;
+        case 4:
+          return 4u;
+        default:
+          llvm_unreachable("invalid format value");
+        }
+      };
+
+      auto isValidSrcASrcBVector = [](FixedVectorType *Ty) {
+        if (!Ty || !Ty->getElementType()->isIntegerTy(32))
+          return false;
+        unsigned NumElts = Ty->getNumElements();
+        return NumElts == 4 || NumElts == 6 || NumElts == 8;
+      };
+
+      auto *Src0Ty = dyn_cast<FixedVectorType>(Src0->getType());
+      auto *Src1Ty = dyn_cast<FixedVectorType>(Src1->getType());
+      Check(isValidSrcASrcBVector(Src0Ty),
+            "operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0);
+      Check(isValidSrcASrcBVector(Src1Ty),
+            "operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1);
+
+      // Permit excess registers for the format.
+      Check(Src0Ty->getNumElements() >= getFormatNumRegs(CBSZ),
+            "invalid vector type for format", &Call, Src0,
+            Call.getArgOperand(3));
+      Check(Src1Ty->getNumElements() >= getFormatNumRegs(BLGP),
+            "invalid vector type for format", &Call, Src1,
+            Call.getArgOperand(5));
+      break;
+    }
+    case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
+    case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
+    case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
+      Value *Src0 = Call.getArgOperand(1);
+      Value *Src1 = Call.getArgOperand(3);
+
+      unsigned FmtA = cast<ConstantInt>(Call.getArgOperand(0))->getZExtValue();
+      unsigned FmtB = cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue();
+      Check(FmtA <= 4, "invalid value for matrix format", Call,
+            Call.getArgOperand(0));
+      Check(FmtB <= 4, "invalid value for matrix format", Call,
+            Call.getArgOperand(2));
+
+      // AMDGPU::MatrixFMT values
+      auto getFormatNumRegs = [](unsigned FormatVal) {
+        switch (FormatVal) {
+        case 0:
+        case 1:
+          return 16u;
+        case 2:
+        case 3:
+          return 12u;
+        case 4:
+          return 8u;
+        default:
+          llvm_unreachable("invalid format value");
+        }
+      };
+
+      auto isValidSrcASrcBVector = [](FixedVectorType *Ty) {
+        if (!Ty || !Ty->getElementType()->isIntegerTy(32))
+          return false;
+        unsigned NumElts = Ty->getNumElements();
+        return NumElts == 16 || NumElts == 12 || NumElts == 8;
+      };
+
+      auto *Src0Ty = dyn_cast<FixedVectorType>(Src0->getType());
+      auto *Src1Ty = dyn_cast<FixedVectorType>(Src1->getType());
+      Check(isValidSrcASrcBVector(Src0Ty),
+            "operand 1 must be 8, 12 or 16 element i32 vector", &Call, Src0);
+      Check(isValidSrcASrcBVector(Src1Ty),
+            "operand 3 must be 8, 12 or 16 element i32 vector", &Call, Src1);
+
+      // Permit excess registers for the format.
+      Check(Src0Ty->getNumElements() >= getFormatNumRegs(FmtA),
+            "invalid vector type for format", &Call, Src0,
+            Call.getArgOperand(0));
+      Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB),
+            "invalid vector type for format", &Call, Src1,
+            Call.getArgOperand(2));
+      break;
+    }
+    case Intrinsic::amdgcn_cooperative_atomic_load_32x4B:
+    case Intrinsic::amdgcn_cooperative_atomic_load_16x8B:
+    case Intrinsic::amdgcn_cooperative_atomic_load_8x16B:
+    case Intrinsic::amdgcn_cooperative_atomic_store_32x4B:
+    case Intrinsic::amdgcn_cooperative_atomic_store_16x8B:
+    case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: {
+      // Check we only use this intrinsic on the FLAT or GLOBAL address spaces.
+      Value *PtrArg = Call.getArgOperand(0);
+      const unsigned AS = PtrArg->getType()->getPointerAddressSpace();
+      Check(AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS,
+            "cooperative atomic intrinsics require a generic or global pointer",
+            &Call, PtrArg);
+
+      // Last argument must be a MD string
+      auto *Op = cast<MetadataAsValue>(Call.getArgOperand(Call.arg_size() - 1));
+      MDNode *MD = cast<MDNode>(Op->getMetadata());
+      Check((MD->getNumOperands() == 1) && isa<MDString>(MD->getOperand(0)),
+            "cooperative atomic intrinsics require that the last argument is a "
+            "metadata string",
+            &Call, Op);
+      break;
+    }
+    }
+  }
+};
+
+} // anonymous namespace
+
+void llvm::initializeAMDGPUVerifier() { static AMDGPUVerifier TheVerifier; }
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index aae56eef73edd..2b04cdf917412 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -117,6 +117,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUTargetMachine.cpp
   AMDGPUTargetObjectFile.cpp
   AMDGPUTargetTransformInfo.cpp
+  AMDGPUVerifier.cpp
   AMDGPUWaitSGPRHazards.cpp
   AMDGPUUnifyDivergentExitNodes.cpp
   R600MachineCFGStructurizer.cpp
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 3847f4e966afe..e34ab59683faa 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -81,6 +81,8 @@ void initializeMVEVPTBlockPass(PassRegistry &);
 void initializeThumb2ITBlockPass(PassRegistry &);
 void initializeThumb2SizeReducePass(PassRegistry &);
 
+void initializeARMVerifier();
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_ARM_ARM_H
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 346776e0c4b25..3e6b9688b933d 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -90,6 +90,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
   RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
   RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget());
 
+  initializeARMVerifier();
+
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeGlobalISel(Registry);
   initializeARMAsmPrinterPass(Registry);
diff --git a/llvm/lib/Target/ARM/ARMVerifier.cpp b/llvm/lib/Target/ARM/ARMVerifier.cpp
new file mode 100644
index 0000000000000..a4e116fb64aef
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMVerifier.cpp
@@ -0,0 +1,58 @@
+//===- ARMVerifier.h ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// IR verifier plugin for ARM intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/Verifier.h"
+
+using namespace llvm;
+
+namespace {
+
+#define Check(C, ...)                                                          \
+  do {                                                                         \
+    if (!(C)) {                                                                \
+      VS.CheckFailed(__VA_ARGS__);                                             \
+      return;                                                                  \
+    }                                                                          \
+  } while (false)
+
+class ARMVerifier : public VerifierPlugin {
+public:
+  void verifyIntrinsicCall(CallBase &Call, VerifierSupport &VS) const override {
+    switch (Call.getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::arm_ldaex:
+    case Intrinsic::arm_ldrex: {
+      Type *ElemTy = Call.getParamElementType(0);
+      Check(ElemTy,
+            "Intrinsic requires elementtype attribute on first argument.",
+            &Call);
+      break;
+    }
+    case Intrinsic::arm_stlex:
+    case Intrinsic::arm_strex: {
+      Type *ElemTy = Call.getParamElementType(1);
+      Check(ElemTy,
+            "Intrinsic requires elementtype attribute on second argument.",
+            &Call);
+      break;
+    }
+    }
+  }
+};
+
+} // anonymous namespace
+
+void llvm::initializeARMVerifier() { static ARMVerifier Verifier; }
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index fa778cad4af8e..60d28421486c7 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -57,6 +57,7 @@ add_llvm_target(ARMCodeGen
   ARMTargetMachine.cpp
   ARMTargetObjectFile.cpp
   ARMTargetTransformInfo.cpp
+  ARMVerifier.cpp
   MLxExpansionPass.cpp
   MVEGatherScatterLowering.cpp
   MVELaneInterleavingPass.cpp
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index 693f0d0b35edc..ce8c3fbbd0e7a 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -42,6 +42,7 @@ set(NVPTXCodeGen_sources
   NVPTXUtilities.cpp
   NVVMIntrRange.cpp
   NVVMReflect.cpp
+  NVVMVerifier.cpp
   )
 
 add_llvm_target(NVPTXCodeGen
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 77a0e03d4075a..cb8f9e15154be 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -56,6 +56,8 @@ MachineFunctionPass *createNVPTXPeephole();
 MachineFunctionPass *createNVPTXProxyRegErasurePass();
 MachineFunctionPass *createNVPTXForwardParamsPass();
 
+void initializeNVVMVerifier();
+
 void initializeNVVMReflectLegacyPassPass(PassRegistry &);
 void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a6837a482608c..789620d3e3b73 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -93,6 +93,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
   RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
   RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
 
+  initializeNVVMVerifier();
+
   PassRegistry &PR = *PassRegistry::getPassRegistry();
   // FIXME: This pass is really intended to be invoked during IR optimization,
   // but it's very NVPTX-specific.
diff --git a/llvm/lib/Target/NVPTX/NVVMVerifier.cpp b/llvm/lib/Target/NVPTX/NVVMVerifier.cpp
new file mode 100644
index 0000000000000..7f8a471b76bfa
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVVMVerifier.cpp
@@ -0,0 +1,51 @@
+//===- NVVMVerifier.h -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// IR verifier plugin for NVVM intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/Verifier.h"
+
+using namespace llvm;
+
+namespace {
+
+#define Check(C, ...)                                                          \
+  do {                                                                         \
+    if (!(C)) {                                                                \
+      VS.CheckFailed(__VA_ARGS__);                                             \
+      return;                                                                  \
+    }                                                                          \
+  } while (false)
+
+class NVVMVerifier : public VerifierPlugin {
+public:
+  void verifyIntrinsicCall(CallBase &Call, VerifierSupport &VS) const override {
+    switch (Call.getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
+    case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
+      Value *V = Call.getArgOperand(0);
+      unsigned RegCount = cast<ConstantInt>(V)->getZExtValue();
+      Check(RegCount % 8 == 0,
+            "reg_count argument to nvvm.setmaxnreg must be in multiples of 8");
+      break;
+    }
+    }
+  }
+};
+
+} // anonymous namespace
+
+void llvm::initializeNVVMVerifier() { static NVVMVerifier Verifier; }
diff --git a/llvm/tools/llvm-as/CMakeLists.txt b/llvm/tools/llvm-as/CMakeLists.txt
index b21410fd23af7..7ba9f7ee9fa55 100644
--- a/llvm/tools/llvm-as/CMakeLists.txt
+++ b/llvm/tools/llvm-as/CMakeLists.txt
@@ -1,4 +1,6 @@
 set(LLVM_LINK_COMPONENTS
+  AllTargetsCodeGens
+  AllTargetsInfos
   AsmParser
   BitWriter
   Core
diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp
index 21648674b51f1..5f1fcbc981156 100644
--- a/llvm/tools/llvm-as/llvm-as.cpp
+++ b/llvm/tools/llvm-as/llvm-as.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include <memory>
 #include <optional>
@@ -114,6 +115,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
 
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
+  InitializeAllTargets(); // for verifier plugins
   cl::HideUnrelatedOptions(AsCat);
   cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n");
   LLVMContext Context;