[clang] [llvm] IR: Add module level attribution language-standard (PR #93159)

YunQiang Su via llvm-commits llvm-commits at lists.llvm.org
Thu May 23 02:40:34 PDT 2024


https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/93159

The backend may have interest on the language-standard level of source file. Let's pass it to IR.

In a ll file, it looks like
   language-standard = "gnu17"

>From 3e413bae6ba18276e4d47075d48d656e4f732b5e Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at gcc.gnu.org>
Date: Thu, 23 May 2024 17:33:34 +0800
Subject: [PATCH] IR: Add module level attribution language-standard

The backend may have interest on the language-standard level
of source file. Let's pass it to IR.

In a ll file, it looks like
   language-standard = "gnu17"
---
 clang/lib/CodeGen/ModuleBuilder.cpp       |  3 +++
 llvm/include/llvm/Bitcode/LLVMBitCodes.h  |  3 +++
 llvm/include/llvm/IR/Module.h             |  7 +++++++
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 22 +++++++++++++++++++++-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 23 +++++++++++++++++++++++
 llvm/lib/IR/AsmWriter.cpp                 |  6 ++++++
 llvm/lib/IR/Module.cpp                    |  4 ++--
 7 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index df85295cfb2e2..cf07b09b93fd7 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -153,6 +153,9 @@ namespace {
 
       M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
       M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
+      M->setLanguageStandard(
+          LangStandard::getLangStandardForKind(Ctx->getLangOpts().LangStd)
+              .getName());
       const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion();
       if (!SDKVersion.empty())
         M->setSDKVersion(SDKVersion);
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 909eb833c601a..cba77c322fdb4 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -120,6 +120,9 @@ enum ModuleCodes {
 
   // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility]
   MODULE_CODE_IFUNC = 18,
+
+  // LANGUAGE_STANDARD: [strchr x N]
+  MODULE_CODE_LANGUAGE_STANDARD = 19,
 };
 
 /// PARAMATTR blocks have code for defining a parameter attribute set.
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index 6135e15fd030f..300dc5c5630dc 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -196,6 +196,7 @@ class LLVM_EXTERNAL_VISIBILITY Module {
                                   ///< recorded in bitcode.
   std::string TargetTriple;       ///< Platform target triple Module compiled on
                                   ///< Format: (arch)(sub)-(vendor)-(sys0-(abi)
+  std::string LanguageStandard;   ///< Language standard: c89/c99/c11/c23 etc.
   NamedMDSymTabType NamedMDSymTab;  ///< NamedMDNode names.
   DataLayout DL;                  ///< DataLayout associated with the module
   StringMap<unsigned>
@@ -277,6 +278,9 @@ class LLVM_EXTERNAL_VISIBILITY Module {
   /// contain the source file name.
   const std::string &getSourceFileName() const { return SourceFileName; }
 
+  /// Get the module's language standard.
+  const std::string &getLanguageStandard() const { return LanguageStandard; }
+
   /// Get a short "name" for the module.
   ///
   /// This is useful for debugging or logging. It is essentially a convenience
@@ -339,6 +343,9 @@ class LLVM_EXTERNAL_VISIBILITY Module {
   /// Set the target triple.
   void setTargetTriple(StringRef T) { TargetTriple = std::string(T); }
 
+  /// Set the language standard.
+  void setLanguageStandard(StringRef S) { LanguageStandard = std::string(S); }
+
   /// Set the module-scope inline assembly blocks.
   /// A trailing newline is added if the input doesn't have one.
   void setModuleInlineAsm(StringRef Asm) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c9295344f8080..8078291477886 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -946,6 +946,9 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
   /// Original source file name recorded in a bitcode record.
   std::string SourceFileName;
 
+  /// Original language standard recorded in a bitcode record.
+  std::string LanguageStandard;
+
   /// The string identifier given to this module by the client, normally the
   /// path to the bitcode file.
   StringRef ModulePath;
@@ -4613,13 +4616,22 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
       VSTOffset = Record[0] - 1;
       break;
     /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
-    case bitc::MODULE_CODE_SOURCE_FILENAME:
+    case bitc::MODULE_CODE_SOURCE_FILENAME: {
       SmallString<128> ValueName;
       if (convertToString(Record, 0, ValueName))
         return error("Invalid record");
       TheModule->setSourceFileName(ValueName);
       break;
     }
+    /// MODULE_CODE_LANGUAGE_STANDARD: [strchar x N]
+    case bitc::MODULE_CODE_LANGUAGE_STANDARD: {
+      SmallString<128> ValueLangStd;
+      if (convertToString(Record, 0, ValueLangStd))
+        return error("Invalid record");
+      TheModule->setLanguageStandard(ValueLangStd);
+      break;
+    }
+    }
     Record.clear();
   }
   this->ValueTypeCallback = std::nullopt;
@@ -7227,6 +7239,14 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
           SourceFileName = ValueName.c_str();
           break;
         }
+        /// MODULE_CODE_LANGUAGE_STANDARD: [strchar x N]
+        case bitc::MODULE_CODE_LANGUAGE_STANDARD: {
+          SmallString<128> ValueName;
+          if (convertToString(Record, 0, ValueName))
+            return error("Invalid record");
+          LanguageStandard = ValueName.c_str();
+          break;
+        }
         /// MODULE_CODE_HASH: [5*i32]
         case bitc::MODULE_CODE_HASH: {
           if (Record.size() != 5)
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index c4cea3d6eef2d..ef39c31bae416 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1484,6 +1484,29 @@ void ModuleBitcodeWriter::writeModuleInfo() {
     Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
     Vals.clear();
   }
+  // Emit the module's language standard.
+  {
+    StringEncoding Bits = getStringEncoding(M.getLanguageStandard());
+    BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
+    if (Bits == SE_Char6)
+      AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
+    else if (Bits == SE_Fixed7)
+      AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
+
+    // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
+    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_LANGUAGE_STANDARD));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(AbbrevOpToUse);
+    unsigned LangStdAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+    for (const auto P : M.getLanguageStandard())
+      Vals.push_back((unsigned char)P);
+
+    // Emit the finished record.
+    Stream.EmitRecord(bitc::MODULE_CODE_LANGUAGE_STANDARD, Vals, LangStdAbbrev);
+    Vals.clear();
+  }
 
   // Emit the global variable information.
   for (const GlobalVariable &GV : M.globals()) {
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 941f6a7a7d823..8f583691b51f0 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2928,6 +2928,12 @@ void AssemblyWriter::printModule(const Module *M) {
   if (!M->getTargetTriple().empty())
     Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
 
+  if (!M->getLanguageStandard().empty()) {
+    Out << "language_standard = \"";
+    printEscapedString(M->getLanguageStandard(), Out);
+    Out << "\"\n";
+  }
+
   if (!M->getModuleInlineAsm().empty()) {
     Out << '\n';
 
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index a8696ed9e3ce5..1d7395b284339 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -71,8 +71,8 @@ template class llvm::SymbolTableListTraits<GlobalIFunc>;
 
 Module::Module(StringRef MID, LLVMContext &C)
     : Context(C), ValSymTab(std::make_unique<ValueSymbolTable>(-1)),
-      ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL(""),
-      IsNewDbgInfoFormat(false) {
+      ModuleID(std::string(MID)), SourceFileName(std::string(MID)),
+      LanguageStandard(""), DL(""), IsNewDbgInfoFormat(false) {
   Context.addModule(this);
 }
 



More information about the llvm-commits mailing list