[llvm] [llvm][IR] Add per-global code model attribute (PR #72077)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 1 04:48:59 PST 2023
https://github.com/heiher updated https://github.com/llvm/llvm-project/pull/72077
>From 23891c2c7b4c6ee7771522f468a22580848d17a0 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Fri, 10 Nov 2023 21:07:43 -0600
Subject: [PATCH 1/2] [llvm][IR] Add per-global code model attribute
This adds a per-global code model attribute, which can override
the target's code model to access global variables.
Link: https://discourse.llvm.org/t/how-to-best-implement-code-model-overriding-for-certain-values/71816
Link: https://discourse.llvm.org/t/rfc-add-per-global-code-model-attribute/74944
Suggested-by: Arthur Eubanks <aeubanks at google.com>
Signed-off-by: WANG Rui <wangrui at loongson.cn>
---
llvm/docs/LangRef.rst | 11 +++++++
llvm/include/llvm/AsmParser/LLParser.h | 1 +
llvm/include/llvm/AsmParser/LLToken.h | 1 +
llvm/include/llvm/IR/GlobalObject.h | 27 +++++++++++++++++
llvm/lib/AsmParser/LLLexer.cpp | 1 +
llvm/lib/AsmParser/LLParser.cpp | 29 +++++++++++++++++++
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 25 ++++++++++++++++
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 5 ++--
llvm/lib/IR/AsmWriter.cpp | 21 ++++++++++++++
llvm/lib/IR/Globals.cpp | 11 +++++++
.../Assembler/globalvariable-attributes.ll | 10 +++++++
11 files changed, 140 insertions(+), 2 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index ff4f769dcc0dbac..e61442f2fd9195b 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -701,6 +701,9 @@ information. Attaching section information to an external declaration is an
assertion that its definition is located in the specified section. If the
definition is located in a different section, the behavior is undefined.
+LLVM allows an explicit code model to be specified for globals. If the
+target supports it, it will emit globals in the code model specified.
+
By default, global initializers are optimized by assuming that global
variables defined within the module are not modified from their
initial values before the start of the global initializer. This is
@@ -757,6 +760,7 @@ Syntax::
<global | constant> <Type> [<InitializerConstant>]
[, section "name"] [, partition "name"]
[, comdat [($name)]] [, align <Alignment>]
+ [, code_model "model"]
[, no_sanitize_address] [, no_sanitize_hwaddress]
[, sanitize_address_dyninit] [, sanitize_memtag]
(, !name !N)*
@@ -774,6 +778,13 @@ The following example just declares a global variable
@G = external global i32
+The following example defines a global variable with the
+``large`` code model:
+
+.. code-block:: llvm
+
+ @G = internal global i32 0, code_model "large"
+
The following example defines a thread-local global with the
``initialexec`` TLS model:
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index eca908a24aac7b2..9e5968c5917f73f 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -290,6 +290,7 @@ namespace llvm {
bool parseOptionalCallingConv(unsigned &CC);
bool parseOptionalAlignment(MaybeAlign &Alignment,
bool AllowParens = false);
+ bool parseOptionalCodeModel(CodeModel::Model &model);
bool parseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
bool parseOptionalUWTableKind(UWTableKind &Kind);
bool parseAllocKind(AllocFnKind &Kind);
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index c9dcd29b31955dc..56a8db2d68df238 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -115,6 +115,7 @@ enum Kind {
kw_addrspace,
kw_section,
kw_partition,
+ kw_code_model,
kw_alias,
kw_ifunc,
kw_module,
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 889bd3a28e12b37..592b7e0cb33ab54 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -18,6 +18,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
+#include "llvm/Support/CodeGen.h"
namespace llvm {
@@ -51,6 +52,7 @@ class GlobalObject : public GlobalValue {
Comdat *ObjComdat = nullptr;
enum {
LastAlignmentBit = 5,
+ LastCodeModelBit = 8,
HasSectionHashEntryBit,
GlobalObjectBits,
@@ -61,6 +63,9 @@ class GlobalObject : public GlobalValue {
private:
static const unsigned AlignmentBits = LastAlignmentBit + 1;
static const unsigned AlignmentMask = (1 << AlignmentBits) - 1;
+ static const unsigned CodeModelBits = LastCodeModelBit - LastAlignmentBit;
+ static const unsigned CodeModelMask = (1 << CodeModelBits) - 1;
+ static const unsigned CodeModelShift = AlignmentBits;
static const unsigned GlobalObjectMask = (1 << GlobalObjectBits) - 1;
public:
@@ -124,6 +129,28 @@ class GlobalObject : public GlobalValue {
/// appropriate default object file section.
void setSection(StringRef S);
+ /// Get the custom code model raw value of this global.
+ ///
+ unsigned getCodeModelRaw() const {
+ unsigned Data = getGlobalValueSubClassData();
+ return (Data >> CodeModelShift) & CodeModelMask;
+ }
+
+ /// Get the custom code model of this global if it has one.
+ ///
+ /// If this global does not have a custom code model, the empty instance
+ /// will be returned.
+ std::optional<CodeModel::Model> getCodeModel() const {
+ unsigned CodeModelData = getCodeModelRaw();
+ if (CodeModelData > 0)
+ return static_cast<CodeModel::Model>(CodeModelData - 1);
+ return {};
+ }
+
+ /// Change the code model for this global.
+ ///
+ void setCodeModel(CodeModel::Model CM);
+
bool hasComdat() const { return getComdat() != nullptr; }
const Comdat *getComdat() const { return ObjComdat; }
Comdat *getComdat() { return ObjComdat; }
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index da9e9f4a3c9833b..3b96f0e4fe866bd 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -570,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(addrspace);
KEYWORD(section);
KEYWORD(partition);
+ KEYWORD(code_model);
KEYWORD(alias);
KEYWORD(ifunc);
KEYWORD(module);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 9940bfb15d1979e..784f2e71c726a67 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1286,6 +1286,11 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
return true;
if (Alignment)
GV->setAlignment(*Alignment);
+ } else if (Lex.getKind() == lltok::kw_code_model) {
+ CodeModel::Model CodeModel;
+ if (parseOptionalCodeModel(CodeModel))
+ return true;
+ GV->setCodeModel(CodeModel);
} else if (Lex.getKind() == lltok::MetadataVar) {
if (parseGlobalObjectMetadataAttachment(*GV))
return true;
@@ -2166,6 +2171,30 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
return false;
}
+/// parseOptionalCodeModel
+/// ::= /* empty */
+/// ::= 'code_model' "large"
+bool LLParser::parseOptionalCodeModel(CodeModel::Model &model) {
+ Lex.Lex();
+ auto StrVal = Lex.getStrVal();
+ auto ErrMsg = "expected global code model string";
+ if (StrVal == "tiny")
+ model = CodeModel::Tiny;
+ else if (StrVal == "small")
+ model = CodeModel::Small;
+ else if (StrVal == "kernel")
+ model = CodeModel::Kernel;
+ else if (StrVal == "medium")
+ model = CodeModel::Medium;
+ else if (StrVal == "large")
+ model = CodeModel::Large;
+ else
+ return tokError(ErrMsg);
+ if (parseToken(lltok::StringConstant, ErrMsg))
+ return true;
+ return false;
+}
+
/// parseOptionalDerefAttrBytes
/// ::= /* empty */
/// ::= AttrKind '(' 4 ')'
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 76431e883b8d96d..9da27e00a8c0595 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1144,6 +1144,23 @@ static bool getDecodedDSOLocal(unsigned Val) {
}
}
+static std::optional<CodeModel::Model> getDecodedCodeModel(unsigned Val) {
+ switch (Val) {
+ case 1:
+ return CodeModel::Tiny;
+ case 2:
+ return CodeModel::Small;
+ case 3:
+ return CodeModel::Kernel;
+ case 4:
+ return CodeModel::Medium;
+ case 5:
+ return CodeModel::Large;
+ }
+
+ return {};
+}
+
static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) {
switch (Val) {
case 0: return GlobalVariable::NotThreadLocal;
@@ -3809,6 +3826,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
// dllstorageclass, comdat, attributes, preemption specifier,
// partition strtab offset, partition strtab size] (name in VST)
// v2: [strtab_offset, strtab_size, v1]
+ // v3: [v2, code_model]
StringRef Name;
std::tie(Name, Record) = readNameFromStrtab(Record);
@@ -3917,6 +3935,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
NewGV->setSanitizerMetadata(Meta);
}
+ if (Record.size() > 17 && Record[17]) {
+ if (auto CM = getDecodedCodeModel(Record[17]))
+ NewGV->setCodeModel(*CM);
+ else
+ return error("Invalid global variable code model");
+ }
+
return Error::success();
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index d16b5c7781c2413..be6779a05aa3122 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1404,7 +1404,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// GLOBALVAR: [strtab offset, strtab size, type, isconst, initid,
// linkage, alignment, section, visibility, threadlocal,
// unnamed_addr, externally_initialized, dllstorageclass,
- // comdat, attributes, DSO_Local, GlobalSanitizer]
+ // comdat, attributes, DSO_Local, GlobalSanitizer, code_model]
Vals.push_back(addToStrtab(GV.getName()));
Vals.push_back(GV.getName().size());
Vals.push_back(VE.getTypeID(GV.getValueType()));
@@ -1421,7 +1421,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
GV.isExternallyInitialized() ||
GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() ||
- GV.hasPartition() || GV.hasSanitizerMetadata()) {
+ GV.hasPartition() || GV.hasSanitizerMetadata() || GV.getCodeModel()) {
Vals.push_back(getEncodedVisibility(GV));
Vals.push_back(getEncodedThreadLocalMode(GV));
Vals.push_back(getEncodedUnnamedAddr(GV));
@@ -1439,6 +1439,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back((GV.hasSanitizerMetadata() ? serializeSanitizerMetadata(
GV.getSanitizerMetadata())
: 0));
+ Vals.push_back(GV.getCodeModelRaw());
} else {
AbbrevToUse = SimpleGVarAbbrev;
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 6d66b34423949fb..5c64827e49a5c96 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -3647,6 +3647,27 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
printEscapedString(GV->getPartition(), Out);
Out << '"';
}
+ if (auto CM = GV->getCodeModel()) {
+ Out << ", code_model \"";
+ switch (*CM) {
+ case CodeModel::Tiny:
+ Out << "tiny";
+ break;
+ case CodeModel::Small:
+ Out << "small";
+ break;
+ case CodeModel::Kernel:
+ Out << "kernel";
+ break;
+ case CodeModel::Medium:
+ Out << "medium";
+ break;
+ case CodeModel::Large:
+ Out << "large";
+ break;
+ }
+ Out << '"';
+ }
using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata;
if (GV->hasSanitizerMetadata()) {
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 7bd4503a689e4ae..fb9d56598298791 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -139,6 +139,8 @@ void GlobalObject::copyAttributesFrom(const GlobalObject *Src) {
GlobalValue::copyAttributesFrom(Src);
setAlignment(Src->getAlign());
setSection(Src->getSection());
+ if (auto CM = Src->getCodeModel())
+ setCodeModel(*CM);
}
std::string GlobalValue::getGlobalIdentifier(StringRef Name,
@@ -263,6 +265,15 @@ void GlobalObject::setSection(StringRef S) {
setGlobalObjectFlag(HasSectionHashEntryBit, !S.empty());
}
+void GlobalObject::setCodeModel(CodeModel::Model CM) {
+ unsigned CodeModelData = static_cast<unsigned>(CM) + 1;
+ unsigned OldData = getGlobalValueSubClassData();
+ unsigned NewData = (OldData & ~(CodeModelMask << CodeModelShift)) |
+ (CodeModelData << CodeModelShift);
+ setGlobalValueSubClassData(NewData);
+ assert(getCodeModel() == CM && "Code model representation error!");
+}
+
bool GlobalValue::isNobuiltinFnDef() const {
const Function *F = dyn_cast<Function>(this);
if (!F || F->empty())
diff --git a/llvm/test/Assembler/globalvariable-attributes.ll b/llvm/test/Assembler/globalvariable-attributes.ll
index 544f9bdb270e99a..4882b447973c08d 100644
--- a/llvm/test/Assembler/globalvariable-attributes.ll
+++ b/llvm/test/Assembler/globalvariable-attributes.ll
@@ -9,6 +9,11 @@
@g7 = global i32 2, sanitize_address_dyninit, align 4
@g8 = global i32 2, sanitize_memtag, align 4
@g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, sanitize_memtag, align 4
+ at g10 = global i32 2, code_model "tiny"
+ at g11 = global i32 2, code_model "small"
+ at g12 = global i32 2, code_model "kernel"
+ at g13 = global i32 2, code_model "medium"
+ at g14 = global i32 2, code_model "large"
attributes #0 = { "string" = "value" nobuiltin norecurse }
@@ -21,6 +26,11 @@ attributes #0 = { "string" = "value" nobuiltin norecurse }
; CHECK: @g7 = global i32 2, sanitize_address_dyninit, align 4
; CHECK: @g8 = global i32 2, sanitize_memtag, align 4
; CHECK: @g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, sanitize_memtag, align 4
+; CHECK: @g10 = global i32 2, code_model "tiny"
+; CHECK: @g11 = global i32 2, code_model "small"
+; CHECK: @g12 = global i32 2, code_model "kernel"
+; CHECK: @g13 = global i32 2, code_model "medium"
+; CHECK: @g14 = global i32 2, code_model "large"
; CHECK: attributes #0 = { "key"="value" "key2"="value2" }
; CHECK: attributes #1 = { "key3"="value3" }
>From 1781c2170457179de6473e70d5f23a57a38f9dc5 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Fri, 1 Dec 2023 13:20:23 +0800
Subject: [PATCH 2/2] Update docs
---
llvm/docs/LangRef.rst | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index e61442f2fd9195b..dabb9858551e7c6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -702,7 +702,11 @@ assertion that its definition is located in the specified section. If the
definition is located in a different section, the behavior is undefined.
LLVM allows an explicit code model to be specified for globals. If the
-target supports it, it will emit globals in the code model specified.
+target supports it, it will emit globals in the code model specified,
+overriding the code model used to compile the translation unit.
+The allowed values are "tiny", ..., "large". This may be extended in
+the future to specify global data layout that doesn't cleanly fit into
+a specific code model.
By default, global initializers are optimized by assuming that global
variables defined within the module are not modified from their
More information about the llvm-commits
mailing list