[llvm] [AsmParser] Add support for reading incomplete IR (part 1) (PR #78421)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 17 02:19:16 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: Nikita Popov (nikic)

<details>
<summary>Changes</summary>

Add an `-allow-incomplete-ir` flag to the IR parser, which allows reading IR with missing declarations. This is intended to produce a best-effort interpretation of the IR, along the same lines of what we would manually do when taking, for example, a function from `-print-after-all` output and fixing it up to be valid IR.

This patch only supports dropping references to undeclared metadata, either by dropping metadata attachments from instructions/functions, or by dropping calls to certain intrinsics (like debug intrinsics). I will implement support for inserting missing function/global declarations in a followup patch.

We don't have real use lists for metadata, so the approach here is to iterate over the whole IR and identify metadata that needs to be dropped. This does not support all possible cases, but should handle anything that's relevant for the function-only IR use case.

---
Full diff: https://github.com/llvm/llvm-project/pull/78421.diff


9 Files Affected:

- (modified) llvm/include/llvm/AsmParser/LLParser.h (+1) 
- (modified) llvm/include/llvm/IR/GlobalObject.h (+1) 
- (modified) llvm/include/llvm/IR/Instruction.h (+3) 
- (modified) llvm/include/llvm/IR/Metadata.h (+7) 
- (modified) llvm/include/llvm/IR/Value.h (+3) 
- (modified) llvm/lib/AsmParser/LLParser.cpp (+70-5) 
- (modified) llvm/lib/IR/Metadata.cpp (+24-10) 
- (added) llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll (+9) 
- (added) llvm/test/Assembler/incomplete-ir-metadata.ll (+34) 


``````````diff
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 54bc3e582e01aec..9aea9167543a31b 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -328,6 +328,7 @@ namespace llvm {
 
     // Top-Level Entities
     bool parseTopLevelEntities();
+    void dropUnknownMetadataReferences();
     bool validateEndOfModule(bool UpgradeDebugInfo);
     bool validateEndOfIndex();
     bool parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback);
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index ae8e616824448bd..b6a974d8bb9f086 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -133,6 +133,7 @@ class GlobalObject : public GlobalValue {
   using Value::addMetadata;
   using Value::clearMetadata;
   using Value::eraseMetadata;
+  using Value::eraseMetadataIf;
   using Value::getAllMetadata;
   using Value::getMetadata;
   using Value::hasMetadata;
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 0211b5076131ce6..fcd2ba838e7fd51 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -384,6 +384,9 @@ class Instruction : public User,
   void copyMetadata(const Instruction &SrcInst,
                     ArrayRef<unsigned> WL = ArrayRef<unsigned>());
 
+  /// Erase all metadata that matches the predicate.
+  void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
+
   /// If the instruction has "branch_weights" MD_prof metadata and the MDNode
   /// has three operands (including name string), swap the order of the
   /// metadata.
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 4498423c4c460d9..6e51fd9290656ee 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -397,6 +397,8 @@ class ReplaceableMetadataImpl {
   /// is resolved.
   void resolveAllUses(bool ResolveUsers = true);
 
+  unsigned getNumUses() const { return UseMap.size(); }
+
 private:
   void addRef(void *Ref, OwnerTy Owner);
   void dropRef(void *Ref);
@@ -1221,6 +1223,11 @@ class MDNode : public Metadata {
 
   bool isReplaceable() const { return isTemporary(); }
 
+  unsigned getNumTemporaryUses() const {
+    assert(isTemporary() && "Only for temporaries");
+    return Context.getReplaceableUses()->getNumUses();
+  }
+
   /// RAUW a temporary.
   ///
   /// \pre \a isTemporary() must be \c true.
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 61f9d34eef35670..945081b77e95362 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -618,6 +618,9 @@ class Value {
   /// \returns true if any metadata was removed.
   bool eraseMetadata(unsigned KindID);
 
+  /// Erase all metadata attachments matching the given predicate.
+  void eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred);
+
   /// Erase all metadata attached to this Value.
   void clearMetadata();
 
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index fb9e1ba875e1fa2..12ad74cdda993b2 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -13,8 +13,8 @@
 #include "llvm/AsmParser/LLParser.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/AsmParser/LLToken.h"
 #include "llvm/AsmParser/SlotMapping.h"
@@ -33,6 +33,7 @@
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
@@ -54,6 +55,12 @@
 
 using namespace llvm;
 
+static cl::opt<bool> AllowIncompleteIR(
+    "allow-incomplete-ir", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Allow incomplete IR on a best effort basis (references to unknown "
+        "metadata will be dropped)"));
+
 static std::string getTypeString(Type *T) {
   std::string Result;
   raw_string_ostream Tmp(Result);
@@ -123,6 +130,57 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
         std::make_pair(I.first, std::make_pair(I.second, LocTy())));
 }
 
+void LLParser::dropUnknownMetadataReferences() {
+  auto Pred = [](unsigned MDKind, MDNode *Node) { return Node->isTemporary(); };
+  for (Function &F : *M) {
+    F.eraseMetadataIf(Pred);
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : make_early_inc_range(BB)) {
+        I.eraseMetadataIf(Pred);
+
+        if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+          // If this is a white-listed intrinsic with an unknown metadata
+          // operand, drop it.
+          if (isa<DbgInfoIntrinsic>(II) ||
+              II->getIntrinsicID() ==
+                  Intrinsic::experimental_noalias_scope_decl) {
+            SmallVector<MetadataAsValue *> MVs;
+            for (Value *V : II->args()) {
+              if (auto *MV = dyn_cast<MetadataAsValue>(V))
+                if (auto *MD = dyn_cast<MDNode>(MV->getMetadata()))
+                  if (MD->isTemporary())
+                    MVs.push_back(MV);
+            }
+
+            if (!MVs.empty()) {
+              assert(II->use_empty() && "Cannot have uses");
+              II->eraseFromParent();
+
+              // Also remove no longer used MetadataAsValue wrappers.
+              for (MetadataAsValue *MV : MVs) {
+                if (MV->use_empty())
+                  delete MV;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  for (GlobalVariable &GV : M->globals())
+    GV.eraseMetadataIf(Pred);
+
+  for (const auto &[ID, Info] : make_early_inc_range(ForwardRefMDNodes)) {
+    // Check whether there is only a single use left, which would be in our
+    // own NumberedMetadata.
+    if (Info.first->getNumTemporaryUses() == 1) {
+      NumberedMetadata.erase(ID);
+      ForwardRefMDNodes.erase(ID);
+    }
+  }
+}
+
 /// validateEndOfModule - Do final validity and basic correctness checks at the
 /// end of the module.
 bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
@@ -256,6 +314,9 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
                  "use of undefined value '@" +
                      Twine(ForwardRefValIDs.begin()->first) + "'");
 
+  if (AllowIncompleteIR && !ForwardRefMDNodes.empty())
+    dropUnknownMetadataReferences();
+
   if (!ForwardRefMDNodes.empty())
     return error(ForwardRefMDNodes.begin()->second.second,
                  "use of undefined metadata '!" +
@@ -269,10 +330,14 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
 
   for (auto *Inst : InstsWithTBAATag) {
     MDNode *MD = Inst->getMetadata(LLVMContext::MD_tbaa);
-    assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
-    auto *UpgradedMD = UpgradeTBAANode(*MD);
-    if (MD != UpgradedMD)
-      Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
+    // With incomplete IR, the tbaa metadata may have been dropped.
+    if (!AllowIncompleteIR)
+      assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
+    if (MD) {
+      auto *UpgradedMD = UpgradeTBAANode(*MD);
+      if (MD != UpgradedMD)
+        Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
+    }
   }
 
   // Look for intrinsic functions and CallInst that need to be upgraded.  We use
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 515893d079b8cb0..68ca3db1fb3a997 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1506,6 +1506,21 @@ bool Value::eraseMetadata(unsigned KindID) {
   return Changed;
 }
 
+void Value::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
+  if (!HasMetadata)
+    return;
+
+  auto &MetadataStore = getContext().pImpl->ValueMetadata;
+  MDAttachments &Info = MetadataStore.find(this)->second;
+  assert(!Info.empty() && "bit out of sync with hash table");
+  Info.remove_if([Pred](const MDAttachments::Attachment &I) {
+    return Pred(I.MDKind, I.Node);
+  });
+
+  if (Info.empty())
+    clearMetadata();
+}
+
 void Value::clearMetadata() {
   if (!HasMetadata)
     return;
@@ -1529,6 +1544,13 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
   return Value::getMetadata(KindID);
 }
 
+void Instruction::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
+  if (DbgLoc && Pred(LLVMContext::MD_dbg, DbgLoc.getAsMDNode()))
+    DbgLoc = {};
+
+  Value::eraseMetadataIf(Pred);
+}
+
 void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
   if (!Value::hasMetadata())
     return; // Nothing to remove!
@@ -1539,17 +1561,9 @@ void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
   // A DIAssignID attachment is debug metadata, don't drop it.
   KnownSet.insert(LLVMContext::MD_DIAssignID);
 
-  auto &MetadataStore = getContext().pImpl->ValueMetadata;
-  MDAttachments &Info = MetadataStore.find(this)->second;
-  assert(!Info.empty() && "bit out of sync with hash table");
-  Info.remove_if([&KnownSet](const MDAttachments::Attachment &I) {
-    return !KnownSet.count(I.MDKind);
+  Value::eraseMetadataIf([&KnownSet](unsigned MDKind, MDNode *Node) {
+    return !KnownSet.count(MDKind);
   });
-
-  if (Info.empty()) {
-    // Drop our entry at the store.
-    clearMetadata();
-  }
 }
 
 void Instruction::updateDIAssignIDMapping(DIAssignID *ID) {
diff --git a/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll b/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
new file mode 100644
index 000000000000000..e320472921ed9c9
--- /dev/null
+++ b/llvm/test/Assembler/incomplete-ir-metadata-unsupported.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as -allow-incomplete-ir < %s 2>&1 | FileCheck %s
+
+; CHECK: error: use of undefined metadata '!1'
+define void @test(ptr %p) {
+  %v = load i8, ptr %p, !noalias !0
+  ret void
+}
+
+!0 = !{!1}
diff --git a/llvm/test/Assembler/incomplete-ir-metadata.ll b/llvm/test/Assembler/incomplete-ir-metadata.ll
new file mode 100644
index 000000000000000..fdf7b4ee0ebd231
--- /dev/null
+++ b/llvm/test/Assembler/incomplete-ir-metadata.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -allow-incomplete-ir < %s | FileCheck %s
+
+ at g = global i8 0, !exclude !4
+
+define void @test(ptr %p) !dbg !3 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V1:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[V2:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[V3:%.*]] = load i8, ptr [[P]], align 1, !noalias [[META0:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0]])
+; CHECK-NEXT:    ret void
+;
+  %v1 = load i8, ptr %p, !noalias !0
+  %v2 = load i8, ptr %p, !tbaa !1
+  %v3 = load i8, ptr %p, !dbg !2, !noalias !100
+  call void @llvm.experimental.noalias.scope.decl(metadata !5)
+  call void @llvm.dbg.value(metadata i32 0, metadata !7, metadata !8)
+  call void @llvm.experimental.noalias.scope.decl(metadata !100)
+  ret void
+}
+
+declare void @llvm.experimental.noalias.scope.decl(metadata)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!100 = !{!101}
+!101 = !{!101, !102}
+!102 = !{!102}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = distinct !{[[META2]]}
+;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/78421


More information about the llvm-commits mailing list