[llvm] 9350860 - [AsmParser] Add support for reading incomplete IR (part 1) (#78421)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 19 07:08:20 PST 2024


Author: Nikita Popov
Date: 2024-01-19T16:08:16+01:00
New Revision: 9350860824a8badbbfe2ba81804e163543da2173

URL: https://github.com/llvm/llvm-project/commit/9350860824a8badbbfe2ba81804e163543da2173
DIFF: https://github.com/llvm/llvm-project/commit/9350860824a8badbbfe2ba81804e163543da2173.diff

LOG: [AsmParser] Add support for reading incomplete IR (part 1) (#78421)

Add an `-allow-incomplete-ir` flag to the IR parser, which allows
reading IR with missing declarations. This is intended to produce a
best-effort interpretation of the IR, along the same lines of what we
would manually do when taking, for example, a function from
`-print-after-all` output and fixing it up to be valid IR.

This patch only supports dropping references to undeclared metadata,
either by dropping metadata attachments from instructions/functions, or
by dropping calls to certain intrinsics (like debug intrinsics). I will
implement support for inserting missing function/global declarations in
a followup patch.

We don't have real use lists for metadata, so the approach here is to
iterate over the whole IR and identify metadata that needs to be
dropped. This does not support all possible cases, but should handle
anything that's relevant for the function-only IR use case.

Added: 
    llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
    llvm/test/Assembler/incomplete-ir-metadata.ll

Modified: 
    llvm/include/llvm/AsmParser/LLParser.h
    llvm/include/llvm/IR/GlobalObject.h
    llvm/include/llvm/IR/Instruction.h
    llvm/include/llvm/IR/Metadata.h
    llvm/include/llvm/IR/Value.h
    llvm/lib/AsmParser/LLParser.cpp
    llvm/lib/IR/Metadata.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index b0d02eac8e672d..cf358c384f5203 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -331,6 +331,7 @@ namespace llvm {
 
     // Top-Level Entities
     bool parseTopLevelEntities();
+    void dropUnknownMetadataReferences();
     bool validateEndOfModule(bool UpgradeDebugInfo);
     bool validateEndOfIndex();
     bool parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback);

diff  --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index ae8e616824448b..b6a974d8bb9f08 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -133,6 +133,7 @@ class GlobalObject : public GlobalValue {
   using Value::addMetadata;
   using Value::clearMetadata;
   using Value::eraseMetadata;
+  using Value::eraseMetadataIf;
   using Value::getAllMetadata;
   using Value::getMetadata;
   using Value::hasMetadata;

diff  --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 0211b5076131ce..fcd2ba838e7fd5 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -384,6 +384,9 @@ class Instruction : public User,
   void copyMetadata(const Instruction &SrcInst,
                     ArrayRef<unsigned> WL = ArrayRef<unsigned>());
 
+  /// Erase all metadata that matches the predicate.
+  void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
+
   /// If the instruction has "branch_weights" MD_prof metadata and the MDNode
   /// has three operands (including name string), swap the order of the
   /// metadata.

diff  --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index befb6975ca18d4..db1f44fea3b459 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -417,6 +417,8 @@ class ReplaceableMetadataImpl {
   /// is resolved.
   void resolveAllUses(bool ResolveUsers = true);
 
+  unsigned getNumUses() const { return UseMap.size(); }
+
 private:
   void addRef(void *Ref, OwnerTy Owner);
   void dropRef(void *Ref);
@@ -1243,6 +1245,11 @@ class MDNode : public Metadata {
   bool isReplaceable() const { return isTemporary() || isAlwaysReplaceable(); }
   bool isAlwaysReplaceable() const { return getMetadataID() == DIAssignIDKind; }
 
+  unsigned getNumTemporaryUses() const {
+    assert(isTemporary() && "Only for temporaries");
+    return Context.getReplaceableUses()->getNumUses();
+  }
+
   /// RAUW a temporary.
   ///
   /// \pre \a isTemporary() must be \c true.

diff  --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 61f9d34eef3567..945081b77e9536 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -618,6 +618,9 @@ class Value {
   /// \returns true if any metadata was removed.
   bool eraseMetadata(unsigned KindID);
 
+  /// Erase all metadata attachments matching the given predicate.
+  void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
+
   /// Erase all metadata attached to this Value.
   void clearMetadata();
 

diff  --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index ea7f9a00d719fd..d6c5993797de11 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -13,8 +13,8 @@
 #include "llvm/AsmParser/LLParser.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/AsmParser/LLToken.h"
 #include "llvm/AsmParser/SlotMapping.h"
@@ -32,7 +32,9 @@
 #include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
@@ -54,6 +56,12 @@
 
 using namespace llvm;
 
+static cl::opt<bool> AllowIncompleteIR(
+    "allow-incomplete-ir", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Allow incomplete IR on a best effort basis (references to unknown "
+        "metadata will be dropped)"));
+
 static std::string getTypeString(Type *T) {
   std::string Result;
   raw_string_ostream Tmp(Result);
@@ -123,6 +131,55 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
         std::make_pair(I.first, std::make_pair(I.second, LocTy())));
 }
 
+static void dropIntrinsicWithUnknownMetadataArgument(IntrinsicInst *II) {
+  // White-list intrinsics that are safe to drop.
+  if (!isa<DbgInfoIntrinsic>(II) &&
+      II->getIntrinsicID() != Intrinsic::experimental_noalias_scope_decl)
+    return;
+
+  SmallVector<MetadataAsValue *> MVs;
+  for (Value *V : II->args())
+    if (auto *MV = dyn_cast<MetadataAsValue>(V))
+      if (auto *MD = dyn_cast<MDNode>(MV->getMetadata()))
+        if (MD->isTemporary())
+          MVs.push_back(MV);
+
+  if (!MVs.empty()) {
+    assert(II->use_empty() && "Cannot have uses");
+    II->eraseFromParent();
+
+    // Also remove no longer used MetadataAsValue wrappers.
+    for (MetadataAsValue *MV : MVs)
+      if (MV->use_empty())
+        delete MV;
+  }
+}
+
+void LLParser::dropUnknownMetadataReferences() {
+  auto Pred = [](unsigned MDKind, MDNode *Node) { return Node->isTemporary(); };
+  for (Function &F : *M) {
+    F.eraseMetadataIf(Pred);
+    for (Instruction &I : make_early_inc_range(instructions(F))) {
+      I.eraseMetadataIf(Pred);
+
+      if (auto *II = dyn_cast<IntrinsicInst>(&I))
+        dropIntrinsicWithUnknownMetadataArgument(II);
+    }
+  }
+
+  for (GlobalVariable &GV : M->globals())
+    GV.eraseMetadataIf(Pred);
+
+  for (const auto &[ID, Info] : make_early_inc_range(ForwardRefMDNodes)) {
+    // Check whether there is only a single use left, which would be in our
+    // own NumberedMetadata.
+    if (Info.first->getNumTemporaryUses() == 1) {
+      NumberedMetadata.erase(ID);
+      ForwardRefMDNodes.erase(ID);
+    }
+  }
+}
+
 /// validateEndOfModule - Do final validity and basic correctness checks at the
 /// end of the module.
 bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
@@ -284,6 +341,9 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
                  "use of undefined value '@" +
                      Twine(ForwardRefValIDs.begin()->first) + "'");
 
+  if (AllowIncompleteIR && !ForwardRefMDNodes.empty())
+    dropUnknownMetadataReferences();
+
   if (!ForwardRefMDNodes.empty())
     return error(ForwardRefMDNodes.begin()->second.second,
                  "use of undefined metadata '!" +
@@ -297,10 +357,14 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
 
   for (auto *Inst : InstsWithTBAATag) {
     MDNode *MD = Inst->getMetadata(LLVMContext::MD_tbaa);
-    assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
-    auto *UpgradedMD = UpgradeTBAANode(*MD);
-    if (MD != UpgradedMD)
-      Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
+    // With incomplete IR, the tbaa metadata may have been dropped.
+    if (!AllowIncompleteIR)
+      assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
+    if (MD) {
+      auto *UpgradedMD = UpgradeTBAANode(*MD);
+      if (MD != UpgradedMD)
+        Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
+    }
   }
 
   // Look for intrinsic functions and CallInst that need to be upgraded.  We use

diff  --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index eff4fd371a73ff..37017a222d4853 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1533,6 +1533,21 @@ bool Value::eraseMetadata(unsigned KindID) {
   return Changed;
 }
 
+void Value::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
+  if (!HasMetadata)
+    return;
+
+  auto &MetadataStore = getContext().pImpl->ValueMetadata;
+  MDAttachments &Info = MetadataStore.find(this)->second;
+  assert(!Info.empty() && "bit out of sync with hash table");
+  Info.remove_if([Pred](const MDAttachments::Attachment &I) {
+    return Pred(I.MDKind, I.Node);
+  });
+
+  if (Info.empty())
+    clearMetadata();
+}
+
 void Value::clearMetadata() {
   if (!HasMetadata)
     return;
@@ -1556,6 +1571,13 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
   return Value::getMetadata(KindID);
 }
 
+void Instruction::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
+  if (DbgLoc && Pred(LLVMContext::MD_dbg, DbgLoc.getAsMDNode()))
+    DbgLoc = {};
+
+  Value::eraseMetadataIf(Pred);
+}
+
 void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
   if (!Value::hasMetadata())
     return; // Nothing to remove!
@@ -1566,17 +1588,9 @@ void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
   // A DIAssignID attachment is debug metadata, don't drop it.
   KnownSet.insert(LLVMContext::MD_DIAssignID);
 
-  auto &MetadataStore = getContext().pImpl->ValueMetadata;
-  MDAttachments &Info = MetadataStore.find(this)->second;
-  assert(!Info.empty() && "bit out of sync with hash table");
-  Info.remove_if([&KnownSet](const MDAttachments::Attachment &I) {
-    return !KnownSet.count(I.MDKind);
+  Value::eraseMetadataIf([&KnownSet](unsigned MDKind, MDNode *Node) {
+    return !KnownSet.count(MDKind);
   });
-
-  if (Info.empty()) {
-    // Drop our entry at the store.
-    clearMetadata();
-  }
 }
 
 void Instruction::updateDIAssignIDMapping(DIAssignID *ID) {

diff  --git a/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll b/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
new file mode 100644
index 00000000000000..e320472921ed9c
--- /dev/null
+++ b/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as -allow-incomplete-ir < %s 2>&1 | FileCheck %s
+
+; CHECK: error: use of undefined metadata '!1'
+define void @test(ptr %p) {
+  %v = load i8, ptr %p, !noalias !0
+  ret void
+}
+
+!0 = !{!1}

diff  --git a/llvm/test/Assembler/incomplete-ir-metadata.ll b/llvm/test/Assembler/incomplete-ir-metadata.ll
new file mode 100644
index 00000000000000..fdf7b4ee0ebd23
--- /dev/null
+++ b/llvm/test/Assembler/incomplete-ir-metadata.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -allow-incomplete-ir < %s | FileCheck %s
+
+ at g = global i8 0, !exclude !4
+
+define void @test(ptr %p) !dbg !3 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V1:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[V2:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[V3:%.*]] = load i8, ptr [[P]], align 1, !noalias [[META0:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0]])
+; CHECK-NEXT:    ret void
+;
+  %v1 = load i8, ptr %p, !noalias !0
+  %v2 = load i8, ptr %p, !tbaa !1
+  %v3 = load i8, ptr %p, !dbg !2, !noalias !100
+  call void @llvm.experimental.noalias.scope.decl(metadata !5)
+  call void @llvm.dbg.value(metadata i32 0, metadata !7, metadata !8)
+  call void @llvm.experimental.noalias.scope.decl(metadata !100)
+  ret void
+}
+
+declare void @llvm.experimental.noalias.scope.decl(metadata)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!100 = !{!101}
+!101 = !{!101, !102}
+!102 = !{!102}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = distinct !{[[META2]]}
+;.


        


More information about the llvm-commits mailing list