[llvm] df1a74a - [IR] Support importing modules with invalid data layouts.

Jannik Silvanus via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 12 01:12:37 PST 2023


Author: Jannik Silvanus
Date: 2023-01-12T10:10:45+01:00
New Revision: df1a74ac3c6407d0658c46c859c4a07974af3293

URL: https://github.com/llvm/llvm-project/commit/df1a74ac3c6407d0658c46c859c4a07974af3293
DIFF: https://github.com/llvm/llvm-project/commit/df1a74ac3c6407d0658c46c859c4a07974af3293.diff

LOG: [IR] Support importing modules with invalid data layouts.

Use the existing mechanism to change the data layout using callbacks.

Before this patch, we had a callback type DataLayoutCallbackTy that receives
a single StringRef specifying the target triple, and optionally returns
the data layout string to be used. Module loaders (both IR and BC) then
apply the callback to potentially override the module's data layout,
after first having imported and parsed the data layout from the file.

We can't do the same to fix invalid data layouts, because the import will already
fail, before the callback has a chance to fix it.
Instead, module loaders now tentatively parse the data layout into a string,
wait until the target triple has been parsed, apply the override callback
to the imported string and only then parse the tentative string as a data layout.

Moreover, add the old data layout string S as second argument to the callback,
in addition to the already existing target triple argument.
S is either the default data layout string in case none is specified, or the data
layout string specified in the module, possibly after auto-upgrades (for the BitcodeReader).
This allows callbacks to inspect the old data layout string,
and fix it instead of setting a fixed data layout.

Also allow to pass data layout override callbacks to lazy bitcode module
loader functions.

Differential Revision: https://reviews.llvm.org/D140985

Added: 
    llvm/test/Assembler/invalid-datalayout-override.ll

Modified: 
    llvm/include/llvm/AsmParser/LLParser.h
    llvm/include/llvm/AsmParser/Parser.h
    llvm/include/llvm/Bitcode/BitcodeReader.h
    llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
    llvm/include/llvm/IRReader/IRReader.h
    llvm/lib/AsmParser/LLParser.cpp
    llvm/lib/AsmParser/Parser.cpp
    llvm/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/lib/CodeGen/MIRParser/MIRParser.cpp
    llvm/test/Bitcode/function-default-address-spaces.ll
    llvm/tools/llc/llc.cpp
    llvm/tools/llvm-as/llvm-as.cpp
    llvm/tools/llvm-reduce/ReducerWorkItem.cpp
    llvm/tools/opt/opt.cpp
    llvm/unittests/AsmParser/AsmParserTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 2f39803816330..b07e9fc9cc755 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -179,8 +179,10 @@ namespace llvm {
           Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots),
           BlockAddressPFS(nullptr) {}
     bool Run(
-        bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback =
-                                   [](StringRef) { return std::nullopt; });
+        bool UpgradeDebugInfo,
+        DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
+          return std::nullopt;
+        });
 
     bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots);
 
@@ -318,8 +320,8 @@ namespace llvm {
     bool parseTopLevelEntities();
     bool validateEndOfModule(bool UpgradeDebugInfo);
     bool validateEndOfIndex();
-    bool parseTargetDefinitions();
-    bool parseTargetDefinition();
+    bool parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback);
+    bool parseTargetDefinition(std::string &TentativeDLStr, LocTy &DLStrLoc);
     bool parseModuleAsm();
     bool parseSourceFileName();
     bool parseUnnamedType();

diff  --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h
index 5fbb94dcf6236..c57e7abe554db 100644
--- a/llvm/include/llvm/AsmParser/Parser.h
+++ b/llvm/include/llvm/AsmParser/Parser.h
@@ -29,7 +29,7 @@ struct SlotMapping;
 class SMDiagnostic;
 class Type;
 
-typedef llvm::function_ref<std::optional<std::string>(StringRef)>
+typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
     DataLayoutCallbackTy;
 
 /// This function is a main interface to the LLVM Assembly Parser. It parses
@@ -86,7 +86,7 @@ struct ParsedModuleAndIndex {
 ParsedModuleAndIndex parseAssemblyFileWithIndex(
     StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
     SlotMapping *Slots = nullptr,
-    DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
+    DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
       return std::nullopt;
     });
 
@@ -127,7 +127,7 @@ parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err);
 std::unique_ptr<Module> parseAssembly(
     MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context,
     SlotMapping *Slots = nullptr,
-    DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
+    DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
       return std::nullopt;
     });
 
@@ -169,7 +169,7 @@ parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err);
 bool parseAssemblyInto(
     MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err,
     SlotMapping *Slots = nullptr,
-    DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
+    DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
       return std::nullopt;
     });
 

diff  --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h
index 6b4e0278954ae..a4b300d019a3a 100644
--- a/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -34,7 +34,11 @@ class Module;
 class MemoryBuffer;
 class ModuleSummaryIndex;
 
-typedef llvm::function_ref<std::optional<std::string>(StringRef)>
+// Callback to override the data layout string of an imported bitcode module.
+// The first argument is the target triple, the second argument the data layout
+// string from the input, or a default string. It will be used if the callback
+// returns std::nullopt.
+typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
     DataLayoutCallbackTy;
 
   // These functions are for converting Expected/Error values to
@@ -101,14 +105,18 @@ typedef llvm::function_ref<std::optional<std::string>(StringRef)>
     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
     /// If IsImporting is true, this module is being parsed for ThinLTO
     /// importing into another module.
-    Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
-                                                    bool ShouldLazyLoadMetadata,
-                                                    bool IsImporting);
+    Expected<std::unique_ptr<Module>> getLazyModule(
+        LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting,
+        DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
+          return std::nullopt;
+        });
 
     /// Read the entire bitcode module and return it.
     Expected<std::unique_ptr<Module>> parseModule(
-        LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback =
-                                  [](StringRef) { return std::nullopt; });
+        LLVMContext &Context,
+        DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
+          return std::nullopt;
+        });
 
     /// Returns information about the module to be used for LTO: whether to
     /// compile with ThinLTO, and whether it has a summary.
@@ -145,10 +153,12 @@ typedef llvm::function_ref<std::optional<std::string>(StringRef)>
   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
   /// lazily load metadata as well. If IsImporting is true, this module is
   /// being parsed for ThinLTO importing into another module.
-  Expected<std::unique_ptr<Module>>
-  getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
-                       bool ShouldLazyLoadMetadata = false,
-                       bool IsImporting = false);
+  Expected<std::unique_ptr<Module>> getLazyBitcodeModule(
+      MemoryBufferRef Buffer, LLVMContext &Context,
+      bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
+      DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
+        return std::nullopt;
+      });
 
   /// Like getLazyBitcodeModule, except that the module takes ownership of
   /// the memory buffer if successful. If successful, this moves Buffer. On
@@ -175,7 +185,7 @@ typedef llvm::function_ref<std::optional<std::string>(StringRef)>
   /// Read the specified bitcode file, returning the module.
   Expected<std::unique_ptr<Module>> parseBitcodeFile(
       MemoryBufferRef Buffer, LLVMContext &Context,
-      DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
+      DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
         return std::nullopt;
       });
 

diff  --git a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
index 1f94f7722602a..e1606e7c0ea72 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -34,7 +34,7 @@ class MachineModuleInfo;
 class SMDiagnostic;
 class StringRef;
 
-typedef llvm::function_ref<std::optional<std::string>(StringRef)>
+typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
     DataLayoutCallbackTy;
 
 /// This class initializes machine functions by applying the state loaded from
@@ -52,9 +52,8 @@ class MIRParser {
   /// A new, empty module is created if the LLVM IR isn't present.
   /// \returns nullptr if a parsing error occurred.
   std::unique_ptr<Module>
-  parseIRModule(DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
-    return std::nullopt;
-  });
+  parseIRModule(DataLayoutCallbackTy DataLayoutCallback =
+                    [](StringRef, StringRef) { return std::nullopt; });
 
   /// Parses MachineFunctions in the MIR file and add them to the given
   /// MachineModuleInfo \p MMI.

diff  --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h
index 64ddba382b4f3..4eabf473ce53d 100644
--- a/llvm/include/llvm/IRReader/IRReader.h
+++ b/llvm/include/llvm/IRReader/IRReader.h
@@ -27,7 +27,7 @@ class Module;
 class SMDiagnostic;
 class LLVMContext;
 
-typedef llvm::function_ref<std::optional<std::string>(StringRef)>
+typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
     DataLayoutCallbackTy;
 
 /// If the given MemoryBuffer holds a bitcode image, return a Module
@@ -55,7 +55,7 @@ getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
 /// \param DataLayoutCallback Override datalayout in the llvm assembly.
 std::unique_ptr<Module> parseIR(
     MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context,
-    DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
+    DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
       return std::nullopt;
     });
 
@@ -65,7 +65,7 @@ std::unique_ptr<Module> parseIR(
 /// \param DataLayoutCallback Override datalayout in the llvm assembly.
 std::unique_ptr<Module> parseIRFile(
     StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
-    DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
+    DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
       return std::nullopt;
     });
 }

diff  --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 435f4bf9620d6..8cdf3069740e4 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -95,11 +95,8 @@ bool LLParser::Run(bool UpgradeDebugInfo,
         "Can't read textual IR with a Context that discards named Values");
 
   if (M) {
-    if (parseTargetDefinitions())
+    if (parseTargetDefinitions(DataLayoutCallback))
       return true;
-
-    if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
-      M->setDataLayout(*LayoutOverride);
   }
 
   return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) ||
@@ -353,11 +350,19 @@ bool LLParser::validateEndOfIndex() {
 // Top-Level Entities
 //===----------------------------------------------------------------------===//
 
-bool LLParser::parseTargetDefinitions() {
-  while (true) {
+bool LLParser::parseTargetDefinitions(DataLayoutCallbackTy DataLayoutCallback) {
+  // Delay parsing of the data layout string until the target triple is known.
+  // Then, pass both the the target triple and the tentative data layout string
+  // to DataLayoutCallback, allowing to override the DL string.
+  // This enables importing modules with invalid DL strings.
+  std::string TentativeDLStr = M->getDataLayoutStr();
+  LocTy DLStrLoc;
+
+  bool Done = false;
+  while (!Done) {
     switch (Lex.getKind()) {
     case lltok::kw_target:
-      if (parseTargetDefinition())
+      if (parseTargetDefinition(TentativeDLStr, DLStrLoc))
         return true;
       break;
     case lltok::kw_source_filename:
@@ -365,9 +370,21 @@ bool LLParser::parseTargetDefinitions() {
         return true;
       break;
     default:
-      return false;
+      Done = true;
     }
   }
+  // Run the override callback to potentially change the data layout string, and
+  // parse the data layout string.
+  if (auto LayoutOverride =
+          DataLayoutCallback(M->getTargetTriple(), TentativeDLStr)) {
+    TentativeDLStr = *LayoutOverride;
+    DLStrLoc = {};
+  }
+  Expected<DataLayout> MaybeDL = DataLayout::parse(TentativeDLStr);
+  if (!MaybeDL)
+    return error(DLStrLoc, toString(MaybeDL.takeError()));
+  M->setDataLayout(MaybeDL.get());
+  return false;
 }
 
 bool LLParser::parseTopLevelEntities() {
@@ -471,7 +488,8 @@ bool LLParser::parseModuleAsm() {
 /// toplevelentity
 ///   ::= 'target' 'triple' '=' STRINGCONSTANT
 ///   ::= 'target' 'datalayout' '=' STRINGCONSTANT
-bool LLParser::parseTargetDefinition() {
+bool LLParser::parseTargetDefinition(std::string &TentativeDLStr,
+                                     LocTy &DLStrLoc) {
   assert(Lex.getKind() == lltok::kw_target);
   std::string Str;
   switch (Lex.Lex()) {
@@ -488,13 +506,9 @@ bool LLParser::parseTargetDefinition() {
     Lex.Lex();
     if (parseToken(lltok::equal, "expected '=' after target datalayout"))
       return true;
-    LocTy Loc = Lex.getLoc();
-    if (parseStringConstant(Str))
+    DLStrLoc = Lex.getLoc();
+    if (parseStringConstant(TentativeDLStr))
       return true;
-    Expected<DataLayout> MaybeDL = DataLayout::parse(Str);
-    if (!MaybeDL)
-      return error(Loc, toString(MaybeDL.takeError()));
-    M->setDataLayout(MaybeDL.get());
     return false;
   }
 }

diff  --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp
index 5df7789955dc2..035eea81378e5 100644
--- a/llvm/lib/AsmParser/Parser.cpp
+++ b/llvm/lib/AsmParser/Parser.cpp
@@ -91,9 +91,10 @@ ParsedModuleAndIndex llvm::parseAssemblyWithIndex(MemoryBufferRef F,
                                                   SMDiagnostic &Err,
                                                   LLVMContext &Context,
                                                   SlotMapping *Slots) {
-  return ::parseAssemblyWithIndex(F, Err, Context, Slots,
-                                  /*UpgradeDebugInfo*/ true,
-                                  [](StringRef) { return std::nullopt; });
+  return ::parseAssemblyWithIndex(
+      F, Err, Context, Slots,
+      /*UpgradeDebugInfo*/ true,
+      [](StringRef, StringRef) { return std::nullopt; });
 }
 
 static ParsedModuleAndIndex
@@ -150,7 +151,7 @@ static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F,
   // index, but we need to initialize it.
   LLVMContext unusedContext;
   return LLParser(F.getBuffer(), SM, Err, nullptr, &Index, unusedContext)
-      .Run(true, [](StringRef) { return std::nullopt; });
+      .Run(true, [](StringRef, StringRef) { return std::nullopt; });
 }
 
 std::unique_ptr<ModuleSummaryIndex>

diff  --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index cf5027dd2c569..aa33006dd5963 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -821,7 +821,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
   Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
   Error parseModule(
       uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false,
-      DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
+      DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
         return std::nullopt;
       });
 
@@ -4192,21 +4192,35 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
   // Parts of bitcode parsing depend on the datalayout.  Make sure we
   // finalize the datalayout before we run any of that code.
   bool ResolvedDataLayout = false;
-  auto ResolveDataLayout = [&] {
+  // In order to support importing modules with illegal data layout strings,
+  // delay parsing the data layout string until after upgrades and overrides
+  // have been applied, allowing to fix illegal data layout strings.
+  // Initialize to the current module's layout string in case none is specified.
+  std::string TentativeDataLayoutStr = TheModule->getDataLayoutStr();
+
+  auto ResolveDataLayout = [&]() -> Error {
     if (ResolvedDataLayout)
-      return;
+      return Error::success();
 
-    // datalayout and triple can't be parsed after this point.
+    // Datalayout and triple can't be parsed after this point.
     ResolvedDataLayout = true;
 
-    // Upgrade data layout string.
-    std::string DL = llvm::UpgradeDataLayoutString(
-        TheModule->getDataLayoutStr(), TheModule->getTargetTriple());
-    TheModule->setDataLayout(DL);
+    // Auto-upgrade the layout string
+    TentativeDataLayoutStr = llvm::UpgradeDataLayoutString(
+        TentativeDataLayoutStr, TheModule->getTargetTriple());
+
+    // Apply override
+    if (auto LayoutOverride = DataLayoutCallback(TheModule->getTargetTriple(),
+                                                 TentativeDataLayoutStr))
+      TentativeDataLayoutStr = *LayoutOverride;
+
+    // Now the layout string is finalized in TentativeDataLayoutStr. Parse it.
+    Expected<DataLayout> MaybeDL = DataLayout::parse(TentativeDataLayoutStr);
+    if (!MaybeDL)
+      return MaybeDL.takeError();
 
-    if (auto LayoutOverride =
-            DataLayoutCallback(TheModule->getTargetTriple()))
-      TheModule->setDataLayout(*LayoutOverride);
+    TheModule->setDataLayout(MaybeDL.get());
+    return Error::success();
   };
 
   // Read all the records for this module.
@@ -4220,7 +4234,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      ResolveDataLayout();
+      if (Error Err = ResolveDataLayout())
+        return Err;
       return globalCleanup();
 
     case BitstreamEntry::SubBlock:
@@ -4285,7 +4300,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
           return Err;
         break;
       case bitc::FUNCTION_BLOCK_ID:
-        ResolveDataLayout();
+        if (Error Err = ResolveDataLayout())
+          return Err;
 
         // If this is the first function body we've seen, reverse the
         // FunctionsWithBodies list.
@@ -4384,13 +4400,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
     case bitc::MODULE_CODE_DATALAYOUT: {  // DATALAYOUT: [strchr x N]
       if (ResolvedDataLayout)
         return error("datalayout too late in module");
-      std::string S;
-      if (convertToString(Record, 0, S))
+      if (convertToString(Record, 0, TentativeDataLayoutStr))
         return error("Invalid record");
-      Expected<DataLayout> MaybeDL = DataLayout::parse(S);
-      if (!MaybeDL)
-        return MaybeDL.takeError();
-      TheModule->setDataLayout(MaybeDL.get());
       break;
     }
     case bitc::MODULE_CODE_ASM: {  // ASM: [strchr x N]
@@ -4436,7 +4447,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
         return Err;
       break;
     case bitc::MODULE_CODE_FUNCTION:
-      ResolveDataLayout();
+      if (Error Err = ResolveDataLayout())
+        return Err;
       if (Error Err = parseFunctionRecord(Record))
         return Err;
       break;
@@ -4465,6 +4477,7 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
     }
     Record.clear();
   }
+  return Error::success();
 }
 
 Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata,
@@ -7946,9 +7959,10 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
 
 Expected<std::unique_ptr<Module>>
 BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
-                             bool IsImporting) {
+                             bool IsImporting,
+                             DataLayoutCallbackTy DataLayoutCallback) {
   return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting,
-                       [](StringRef) { return std::nullopt; });
+                       DataLayoutCallback);
 }
 
 // Parse the specified bitcode buffer and merge the index into CombinedIndex.
@@ -8094,12 +8108,14 @@ static Expected<BitcodeModule> getSingleModule(MemoryBufferRef Buffer) {
 
 Expected<std::unique_ptr<Module>>
 llvm::getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
-                           bool ShouldLazyLoadMetadata, bool IsImporting) {
+                           bool ShouldLazyLoadMetadata, bool IsImporting,
+                           DataLayoutCallbackTy DataLayoutCallback) {
   Expected<BitcodeModule> BM = getSingleModule(Buffer);
   if (!BM)
     return BM.takeError();
 
-  return BM->getLazyModule(Context, ShouldLazyLoadMetadata, IsImporting);
+  return BM->getLazyModule(Context, ShouldLazyLoadMetadata, IsImporting,
+                           DataLayoutCallback);
 }
 
 Expected<std::unique_ptr<Module>> llvm::getOwningLazyBitcodeModule(

diff  --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 1cc3cfa169400..f13116e169f35 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -234,7 +234,8 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
     // Create an empty module when the MIR file is empty.
     NoMIRDocuments = true;
     auto M = std::make_unique<Module>(Filename, Context);
-    if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
+    if (auto LayoutOverride =
+            DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr()))
       M->setDataLayout(*LayoutOverride);
     return M;
   }
@@ -257,7 +258,8 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
   } else {
     // Create an new, empty module.
     M = std::make_unique<Module>(Filename, Context);
-    if (auto LayoutOverride = DataLayoutCallback(M->getTargetTriple()))
+    if (auto LayoutOverride =
+            DataLayoutCallback(M->getTargetTriple(), M->getDataLayoutStr()))
       M->setDataLayout(*LayoutOverride);
     NoLLVMIR = true;
   }

diff  --git a/llvm/test/Assembler/invalid-datalayout-override.ll b/llvm/test/Assembler/invalid-datalayout-override.ll
new file mode 100644
index 0000000000000..f62777c6c5328
--- /dev/null
+++ b/llvm/test/Assembler/invalid-datalayout-override.ll
@@ -0,0 +1,7 @@
+; Check that importing this file gives an error due to the broken data layout:
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; Check that specifying a valid data layout allows to import this file:
+; RUN: llvm-as -data-layout "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" < %s
+
+target datalayout = "A16777216"
+; CHECK: Invalid address space, must be a 24-bit integer

diff  --git a/llvm/test/Bitcode/function-default-address-spaces.ll b/llvm/test/Bitcode/function-default-address-spaces.ll
index 72b727924b426..3aae5db863b88 100644
--- a/llvm/test/Bitcode/function-default-address-spaces.ll
+++ b/llvm/test/Bitcode/function-default-address-spaces.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as %s  -o - | llvm-dis - | FileCheck %s -check-prefixes CHECK,PROG-AS0
 ; RUN: llvm-as -data-layout "P200" %s  -o - | llvm-dis | FileCheck %s -check-prefixes CHECK,PROG-AS200
-; RUN: not --crash llvm-as -data-layout "P123456789" %s -o /dev/null 2>&1 | FileCheck %s -check-prefix BAD-DATALAYOUT
-; BAD-DATALAYOUT: LLVM ERROR: Invalid address space, must be a 24-bit integer
+; RUN: not llvm-as -data-layout "P123456789" %s -o /dev/null 2>&1 | FileCheck %s -check-prefix BAD-DATALAYOUT
+; BAD-DATALAYOUT: error: Invalid address space, must be a 24-bit integer
 
 ; PROG-AS0-NOT: target datalayout
 ; PROG-AS200: target datalayout = "P200"

diff  --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index c871d55314886..d59b128b876d1 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -530,8 +530,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
 
   // If user just wants to list available options, skip module loading
   if (!SkipModule) {
-    auto SetDataLayout =
-        [&](StringRef DataLayoutTargetTriple) -> std::optional<std::string> {
+    auto SetDataLayout = [&](StringRef DataLayoutTargetTriple,
+                             StringRef OldDLStr) -> std::optional<std::string> {
       // If we are supposed to override the target triple, do so now.
       std::string IRTargetTriple = DataLayoutTargetTriple.str();
       if (!TargetTriple.empty())

diff  --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp
index e6d4780569c31..ef1c50fc2284e 100644
--- a/llvm/tools/llvm-as/llvm-as.cpp
+++ b/llvm/tools/llvm-as/llvm-as.cpp
@@ -121,7 +121,7 @@ int main(int argc, char **argv) {
 
   // Parse the file now...
   SMDiagnostic Err;
-  auto SetDataLayout = [](StringRef) -> std::optional<std::string> {
+  auto SetDataLayout = [](StringRef, StringRef) -> std::optional<std::string> {
     if (ClDataLayout.empty())
       return std::nullopt;
     return ClDataLayout;

diff  --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
index e86c2ad7294c9..a608f6f3b09f8 100644
--- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
+++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
@@ -407,8 +407,8 @@ parseReducerWorkItem(StringRef ToolName, StringRef Filename, LLVMContext &Ctxt,
     std::unique_ptr<MIRParser> MParser =
         createMIRParser(std::move(FileOrErr.get()), Ctxt);
 
-    auto SetDataLayout =
-        [&](StringRef DataLayoutTargetTriple) -> std::optional<std::string> {
+    auto SetDataLayout = [&](StringRef DataLayoutTargetTriple,
+                             StringRef OldDLStr) -> std::optional<std::string> {
       // If we are supposed to override the target triple, do so now.
       std::string IRTargetTriple = DataLayoutTargetTriple.str();
       if (!TargetTriple.empty())

diff  --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index aa1fe4090f71a..392f0603ab968 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -534,7 +534,7 @@ int main(int argc, char **argv) {
   std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
 
   // Load the input module...
-  auto SetDataLayout = [](StringRef) -> std::optional<std::string> {
+  auto SetDataLayout = [](StringRef, StringRef) -> std::optional<std::string> {
     if (ClDataLayout.empty())
       return std::nullopt;
     return ClDataLayout;

diff  --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 23a0dedd78c02..ee6cd709cf651 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -10,8 +10,10 @@
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 
@@ -380,4 +382,39 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 4);
 }
 
+TEST(AsmParserTest, InvalidDataLayoutStringCallback) {
+  LLVMContext Ctx;
+  SMDiagnostic Error;
+  // Note the invalid i8:7 part
+  // Overalign i32 as marker so we can check that indeed this DL was used,
+  // and not some default.
+  StringRef InvalidDLStr =
+      "e-m:e-p:64:64-i8:7-i16:16-i32:64-i64:64-f80:128-n8:16:32:64";
+  StringRef FixedDLStr =
+      "e-m:e-p:64:64-i8:8-i16:16-i32:64-i64:64-f80:128-n8:16:32:64";
+  Expected<DataLayout> ExpectedFixedDL = DataLayout::parse(FixedDLStr);
+  ASSERT_TRUE(!ExpectedFixedDL.takeError());
+  DataLayout FixedDL = ExpectedFixedDL.get();
+  std::string Source = ("target datalayout = \"" + InvalidDLStr + "\"\n").str();
+  MemoryBufferRef SourceBuffer(Source, "<string>");
+
+  // Check that we reject the source without a DL override.
+  SlotMapping Mapping1;
+  auto Mod1 = parseAssembly(SourceBuffer, Error, Ctx, &Mapping1);
+  EXPECT_TRUE(Mod1 == nullptr);
+
+  // Check that we pass the correct DL str to the callback,
+  // that fixing the DL str from the callback works,
+  // and that the resulting module has the correct DL.
+  SlotMapping Mapping2;
+  auto Mod2 = parseAssembly(
+      SourceBuffer, Error, Ctx, &Mapping2,
+      [&](StringRef Triple, StringRef DLStr) -> std::optional<std::string> {
+        EXPECT_EQ(DLStr, InvalidDLStr);
+        return std::string{FixedDLStr};
+      });
+  ASSERT_TRUE(Mod2 != nullptr);
+  EXPECT_EQ(Mod2->getDataLayout(), FixedDL);
+}
+
 } // end anonymous namespace


        


More information about the llvm-commits mailing list