[llvm] [AsmParser] Add support for reading incomplete IR (part 1) (PR #78421)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 19 06:28:20 PST 2024


https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/78421

>From 8f71d65cbe289f6dad269aa956f07b520fb3ea41 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 16 Jan 2024 17:17:08 +0100
Subject: [PATCH 1/3] [AsmParser] Add support for reading incomplete IR

Add an `-allow-incomplete-ir` flag to the IR parser, which allows
reading IR with missing declarations. This is intended to produce
a best-effort interpretation of the IR, along the same lines of
what we would manually do when taking, for example, a function
from `-print-after-all` output and fixing it up to be valid IR.

This patch only supports dropping references to undeclared metadata,
either by dropping metadata attachments from instructions/functions,
or by dropping calls to certain intrinsics (like debug intrinsics).
I will implement support for inserting missing function/global
declarations in a followup patch.

We don't have real use lists for metadata, so the approach here is
to iterate over the whole IR and identify metadata that needs to
be dropped. This does not support all possible cases, but should
handle anything that's relevant for the function-only IR use case.
---
 llvm/include/llvm/AsmParser/LLParser.h        |  1 +
 llvm/include/llvm/IR/GlobalObject.h           |  1 +
 llvm/include/llvm/IR/Instruction.h            |  3 +
 llvm/include/llvm/IR/Metadata.h               |  7 ++
 llvm/include/llvm/IR/Value.h                  |  3 +
 llvm/lib/AsmParser/LLParser.cpp               | 75 +++++++++++++++++--
 llvm/lib/IR/Metadata.cpp                      | 34 ++++++---
 .../incomplete-ir-metadata-unsupported.ll     |  9 +++
 llvm/test/Assembler/incomplete-ir-metadata.ll | 34 +++++++++
 9 files changed, 152 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
 create mode 100644 llvm/test/Assembler/incomplete-ir-metadata.ll

diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index b0d02eac8e672d..cf358c384f5203 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -331,6 +331,7 @@ namespace llvm {
 
     // Top-Level Entities
     bool parseTopLevelEntities();
+    void dropUnknownMetadataReferences();
     bool validateEndOfModule(bool UpgradeDebugInfo);
     bool validateEndOfIndex();
     bool parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback);
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index ae8e616824448b..b6a974d8bb9f08 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -133,6 +133,7 @@ class GlobalObject : public GlobalValue {
   using Value::addMetadata;
   using Value::clearMetadata;
   using Value::eraseMetadata;
+  using Value::eraseMetadataIf;
   using Value::getAllMetadata;
   using Value::getMetadata;
   using Value::hasMetadata;
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 0211b5076131ce..fcd2ba838e7fd5 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -384,6 +384,9 @@ class Instruction : public User,
   void copyMetadata(const Instruction &SrcInst,
                     ArrayRef<unsigned> WL = ArrayRef<unsigned>());
 
+  /// Erase all metadata that matches the predicate.
+  void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
+
   /// If the instruction has "branch_weights" MD_prof metadata and the MDNode
   /// has three operands (including name string), swap the order of the
   /// metadata.
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index befb6975ca18d4..db1f44fea3b459 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -417,6 +417,8 @@ class ReplaceableMetadataImpl {
   /// is resolved.
   void resolveAllUses(bool ResolveUsers = true);
 
+  unsigned getNumUses() const { return UseMap.size(); }
+
 private:
   void addRef(void *Ref, OwnerTy Owner);
   void dropRef(void *Ref);
@@ -1243,6 +1245,11 @@ class MDNode : public Metadata {
   bool isReplaceable() const { return isTemporary() || isAlwaysReplaceable(); }
   bool isAlwaysReplaceable() const { return getMetadataID() == DIAssignIDKind; }
 
+  unsigned getNumTemporaryUses() const {
+    assert(isTemporary() && "Only for temporaries");
+    return Context.getReplaceableUses()->getNumUses();
+  }
+
   /// RAUW a temporary.
   ///
   /// \pre \a isTemporary() must be \c true.
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 61f9d34eef3567..945081b77e9536 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -618,6 +618,9 @@ class Value {
   /// \returns true if any metadata was removed.
   bool eraseMetadata(unsigned KindID);
 
+  /// Erase all metadata attachments matching the given predicate.
+  void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
+
   /// Erase all metadata attached to this Value.
   void clearMetadata();
 
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index ea7f9a00d719fd..0b7bec45569d4b 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -13,8 +13,8 @@
 #include "llvm/AsmParser/LLParser.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/AsmParser/LLToken.h"
 #include "llvm/AsmParser/SlotMapping.h"
@@ -33,6 +33,7 @@
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
@@ -54,6 +55,12 @@
 
 using namespace llvm;
 
+static cl::opt<bool> AllowIncompleteIR(
+    "allow-incomplete-ir", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Allow incomplete IR on a best effort basis (references to unknown "
+        "metadata will be dropped)"));
+
 static std::string getTypeString(Type *T) {
   std::string Result;
   raw_string_ostream Tmp(Result);
@@ -123,6 +130,57 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
         std::make_pair(I.first, std::make_pair(I.second, LocTy())));
 }
 
+void LLParser::dropUnknownMetadataReferences() {
+  auto Pred = [](unsigned MDKind, MDNode *Node) { return Node->isTemporary(); };
+  for (Function &F : *M) {
+    F.eraseMetadataIf(Pred);
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : make_early_inc_range(BB)) {
+        I.eraseMetadataIf(Pred);
+
+        if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+          // If this is a white-listed intrinsic with an unknown metadata
+          // operand, drop it.
+          if (isa<DbgInfoIntrinsic>(II) ||
+              II->getIntrinsicID() ==
+                  Intrinsic::experimental_noalias_scope_decl) {
+            SmallVector<MetadataAsValue *> MVs;
+            for (Value *V : II->args()) {
+              if (auto *MV = dyn_cast<MetadataAsValue>(V))
+                if (auto *MD = dyn_cast<MDNode>(MV->getMetadata()))
+                  if (MD->isTemporary())
+                    MVs.push_back(MV);
+            }
+
+            if (!MVs.empty()) {
+              assert(II->use_empty() && "Cannot have uses");
+              II->eraseFromParent();
+
+              // Also remove no longer used MetadataAsValue wrappers.
+              for (MetadataAsValue *MV : MVs) {
+                if (MV->use_empty())
+                  delete MV;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  for (GlobalVariable &GV : M->globals())
+    GV.eraseMetadataIf(Pred);
+
+  for (const auto &[ID, Info] : make_early_inc_range(ForwardRefMDNodes)) {
+    // Check whether there is only a single use left, which would be in our
+    // own NumberedMetadata.
+    if (Info.first->getNumTemporaryUses() == 1) {
+      NumberedMetadata.erase(ID);
+      ForwardRefMDNodes.erase(ID);
+    }
+  }
+}
+
 /// validateEndOfModule - Do final validity and basic correctness checks at the
 /// end of the module.
 bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
@@ -284,6 +342,9 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
                  "use of undefined value '@" +
                      Twine(ForwardRefValIDs.begin()->first) + "'");
 
+  if (AllowIncompleteIR && !ForwardRefMDNodes.empty())
+    dropUnknownMetadataReferences();
+
   if (!ForwardRefMDNodes.empty())
     return error(ForwardRefMDNodes.begin()->second.second,
                  "use of undefined metadata '!" +
@@ -297,10 +358,14 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
 
   for (auto *Inst : InstsWithTBAATag) {
     MDNode *MD = Inst->getMetadata(LLVMContext::MD_tbaa);
-    assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
-    auto *UpgradedMD = UpgradeTBAANode(*MD);
-    if (MD != UpgradedMD)
-      Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
+    // With incomplete IR, the tbaa metadata may have been dropped.
+    if (!AllowIncompleteIR)
+      assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
+    if (MD) {
+      auto *UpgradedMD = UpgradeTBAANode(*MD);
+      if (MD != UpgradedMD)
+        Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
+    }
   }
 
   // Look for intrinsic functions and CallInst that need to be upgraded.  We use
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index eff4fd371a73ff..37017a222d4853 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1533,6 +1533,21 @@ bool Value::eraseMetadata(unsigned KindID) {
   return Changed;
 }
 
+void Value::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
+  if (!HasMetadata)
+    return;
+
+  auto &MetadataStore = getContext().pImpl->ValueMetadata;
+  MDAttachments &Info = MetadataStore.find(this)->second;
+  assert(!Info.empty() && "bit out of sync with hash table");
+  Info.remove_if([Pred](const MDAttachments::Attachment &I) {
+    return Pred(I.MDKind, I.Node);
+  });
+
+  if (Info.empty())
+    clearMetadata();
+}
+
 void Value::clearMetadata() {
   if (!HasMetadata)
     return;
@@ -1556,6 +1571,13 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
   return Value::getMetadata(KindID);
 }
 
+void Instruction::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
+  if (DbgLoc && Pred(LLVMContext::MD_dbg, DbgLoc.getAsMDNode()))
+    DbgLoc = {};
+
+  Value::eraseMetadataIf(Pred);
+}
+
 void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
   if (!Value::hasMetadata())
     return; // Nothing to remove!
@@ -1566,17 +1588,9 @@ void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
   // A DIAssignID attachment is debug metadata, don't drop it.
   KnownSet.insert(LLVMContext::MD_DIAssignID);
 
-  auto &MetadataStore = getContext().pImpl->ValueMetadata;
-  MDAttachments &Info = MetadataStore.find(this)->second;
-  assert(!Info.empty() && "bit out of sync with hash table");
-  Info.remove_if([&KnownSet](const MDAttachments::Attachment &I) {
-    return !KnownSet.count(I.MDKind);
+  Value::eraseMetadataIf([&KnownSet](unsigned MDKind, MDNode *Node) {
+    return !KnownSet.count(MDKind);
   });
-
-  if (Info.empty()) {
-    // Drop our entry at the store.
-    clearMetadata();
-  }
 }
 
 void Instruction::updateDIAssignIDMapping(DIAssignID *ID) {
diff --git a/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll b/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
new file mode 100644
index 00000000000000..e320472921ed9c
--- /dev/null
+++ b/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as -allow-incomplete-ir < %s 2>&1 | FileCheck %s
+
+; CHECK: error: use of undefined metadata '!1'
+define void @test(ptr %p) {
+  %v = load i8, ptr %p, !noalias !0
+  ret void
+}
+
+!0 = !{!1}
diff --git a/llvm/test/Assembler/incomplete-ir-metadata.ll b/llvm/test/Assembler/incomplete-ir-metadata.ll
new file mode 100644
index 00000000000000..fdf7b4ee0ebd23
--- /dev/null
+++ b/llvm/test/Assembler/incomplete-ir-metadata.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -allow-incomplete-ir < %s | FileCheck %s
+
+ at g = global i8 0, !exclude !4
+
+define void @test(ptr %p) !dbg !3 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V1:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[V2:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[V3:%.*]] = load i8, ptr [[P]], align 1, !noalias [[META0:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0]])
+; CHECK-NEXT:    ret void
+;
+  %v1 = load i8, ptr %p, !noalias !0
+  %v2 = load i8, ptr %p, !tbaa !1
+  %v3 = load i8, ptr %p, !dbg !2, !noalias !100
+  call void @llvm.experimental.noalias.scope.decl(metadata !5)
+  call void @llvm.dbg.value(metadata i32 0, metadata !7, metadata !8)
+  call void @llvm.experimental.noalias.scope.decl(metadata !100)
+  ret void
+}
+
+declare void @llvm.experimental.noalias.scope.decl(metadata)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!100 = !{!101}
+!101 = !{!101, !102}
+!102 = !{!102}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = distinct !{[[META2]]}
+;.

>From f1c0230a144d5041a0c5baf7b70d50de48b58162 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Fri, 19 Jan 2024 15:17:28 +0100
Subject: [PATCH 2/3] Move intrinsic dropping into a separate function

Allows us to use early exit and avoids very deeply nested code.
---
 llvm/lib/AsmParser/LLParser.cpp | 52 ++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 0b7bec45569d4b..f82d00a39498b4 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -130,6 +130,30 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
         std::make_pair(I.first, std::make_pair(I.second, LocTy())));
 }
 
+static void dropIntrinsicWithUnknownMetadataArgument(IntrinsicInst *II) {
+  // White-list intrinsics that are safe to drop.
+  if (!isa<DbgInfoIntrinsic>(II) &&
+      II->getIntrinsicID() != Intrinsic::experimental_noalias_scope_decl)
+    return;
+
+  SmallVector<MetadataAsValue *> MVs;
+  for (Value *V : II->args())
+    if (auto *MV = dyn_cast<MetadataAsValue>(V))
+      if (auto *MD = dyn_cast<MDNode>(MV->getMetadata()))
+        if (MD->isTemporary())
+          MVs.push_back(MV);
+
+  if (!MVs.empty()) {
+    assert(II->use_empty() && "Cannot have uses");
+    II->eraseFromParent();
+
+    // Also remove no longer used MetadataAsValue wrappers.
+    for (MetadataAsValue *MV : MVs)
+      if (MV->use_empty())
+        delete MV;
+  }
+}
+
 void LLParser::dropUnknownMetadataReferences() {
   auto Pred = [](unsigned MDKind, MDNode *Node) { return Node->isTemporary(); };
   for (Function &F : *M) {
@@ -138,32 +162,8 @@ void LLParser::dropUnknownMetadataReferences() {
       for (Instruction &I : make_early_inc_range(BB)) {
         I.eraseMetadataIf(Pred);
 
-        if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
-          // If this is a white-listed intrinsic with an unknown metadata
-          // operand, drop it.
-          if (isa<DbgInfoIntrinsic>(II) ||
-              II->getIntrinsicID() ==
-                  Intrinsic::experimental_noalias_scope_decl) {
-            SmallVector<MetadataAsValue *> MVs;
-            for (Value *V : II->args()) {
-              if (auto *MV = dyn_cast<MetadataAsValue>(V))
-                if (auto *MD = dyn_cast<MDNode>(MV->getMetadata()))
-                  if (MD->isTemporary())
-                    MVs.push_back(MV);
-            }
-
-            if (!MVs.empty()) {
-              assert(II->use_empty() && "Cannot have uses");
-              II->eraseFromParent();
-
-              // Also remove no longer used MetadataAsValue wrappers.
-              for (MetadataAsValue *MV : MVs) {
-                if (MV->use_empty())
-                  delete MV;
-              }
-            }
-          }
-        }
+        if (auto *II = dyn_cast<IntrinsicInst>(&I))
+          dropIntrinsicWithUnknownMetadataArgument(II);
       }
     }
   }

>From 1efb97278661e93e27c5374db861f5a21a996121 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Fri, 19 Jan 2024 15:27:58 +0100
Subject: [PATCH 3/3] Use InstIterator

---
 llvm/lib/AsmParser/LLParser.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f82d00a39498b4..d6c5993797de11 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
@@ -158,13 +159,11 @@ void LLParser::dropUnknownMetadataReferences() {
   auto Pred = [](unsigned MDKind, MDNode *Node) { return Node->isTemporary(); };
   for (Function &F : *M) {
     F.eraseMetadataIf(Pred);
-    for (BasicBlock &BB : F) {
-      for (Instruction &I : make_early_inc_range(BB)) {
-        I.eraseMetadataIf(Pred);
+    for (Instruction &I : make_early_inc_range(instructions(F))) {
+      I.eraseMetadataIf(Pred);
 
-        if (auto *II = dyn_cast<IntrinsicInst>(&I))
-          dropIntrinsicWithUnknownMetadataArgument(II);
-      }
+      if (auto *II = dyn_cast<IntrinsicInst>(&I))
+        dropIntrinsicWithUnknownMetadataArgument(II);
     }
   }
 



More information about the llvm-commits mailing list