[llvm] [LLVM][IR] Add location tracking to LLVM IR parser (PR #155797)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 04:14:54 PDT 2025
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,Bertik23
<39457484+Bertik23 at users.noreply.github.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,Bertik23
<39457484+Bertik23 at users.noreply.github.com>,Bertik23
<39457484+Bertik23 at users.noreply.github.com>,Bertik23
<39457484+Bertik23 at users.noreply.github.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,Bertik23
<39457484+Bertik23 at users.noreply.github.com>,Bertik23
<39457484+Bertik23 at users.noreply.github.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>,
Albert =?utf-8?q?Havliček?= <ahavlicek at azul.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/155797 at github.com>
https://github.com/Bertik23 updated https://github.com/llvm/llvm-project/pull/155797
>From ecf591c76631957bea7f8d62191d4b99fe8fc4c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Tue, 29 Jul 2025 14:41:18 +0000
Subject: [PATCH 01/31] Update CurLineNum anc CurColNum in sync with movement
in text
---
llvm/include/llvm/AsmParser/LLLexer.h | 2 +
llvm/lib/AsmParser/LLLexer.cpp | 97 +++++++++++++++++----------
2 files changed, 63 insertions(+), 36 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 501a7aefccd7f..5f6e32a4bf5e1 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -94,6 +94,8 @@ namespace llvm {
lltok::Kind LexToken();
int getNextChar();
+ const char *skipNChars(unsigned N);
+ void advancePositionTo(const char *Ptr);
void SkipLineComment();
bool SkipCComment();
lltok::Kind ReadString(lltok::Kind kind);
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 520c6a00a9c07..7cefd4f6b4935 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -190,6 +190,23 @@ int LLLexer::getNextChar() {
}
}
+const char *LLLexer::skipNChars(unsigned N) {
+ while (N--)
+ getNextChar();
+ return CurPtr;
+}
+
+void LLLexer::advancePositionTo(const char *Ptr) {
+ while (CurPtr != Ptr) {
+ // FIXME: Assumes that if moving back, we stay in that line
+ if (CurPtr > Ptr) {
+ --CurPtr;
+ --CurColNum;
+ } else
+ getNextChar();
+ }
+}
+
lltok::Kind LLLexer::LexToken() {
while (true) {
TokStart = CurPtr;
@@ -216,12 +233,12 @@ lltok::Kind LLLexer::LexToken() {
case '"': return LexQuote();
case '.':
if (const char *Ptr = isLabelTail(CurPtr)) {
- CurPtr = Ptr;
+ advancePositionTo(Ptr);
StrVal.assign(TokStart, CurPtr-1);
return lltok::LabelStr;
}
if (CurPtr[0] == '.' && CurPtr[1] == '.') {
- CurPtr += 2;
+ skipNChars(2);
return lltok::dotdotdot;
}
return lltok::Error;
@@ -299,14 +316,14 @@ lltok::Kind LLLexer::LexAt() {
lltok::Kind LLLexer::LexDollar() {
if (const char *Ptr = isLabelTail(TokStart)) {
- CurPtr = Ptr;
+ advancePositionTo(Ptr);
StrVal.assign(TokStart, CurPtr - 1);
return lltok::LabelStr;
}
// Handle DollarStringConstant: $\"[^\"]*\"
if (CurPtr[0] == '"') {
- ++CurPtr;
+ getNextChar();
while (true) {
int CurChar = getNextChar();
@@ -358,11 +375,11 @@ bool LLLexer::ReadVarName() {
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_') {
- ++CurPtr;
+ getNextChar();
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_')
- ++CurPtr;
+ getNextChar();
StrVal.assign(NameStart, CurPtr);
return true;
@@ -376,7 +393,8 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
return lltok::Error;
- for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
+ getNextChar())
/*empty*/;
uint64_t Val = atoull(TokStart + 1, CurPtr);
@@ -389,7 +407,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
// Handle StringConstant: \"[^\"]*\"
if (CurPtr[0] == '"') {
- ++CurPtr;
+ getNextChar();
while (true) {
int CurChar = getNextChar();
@@ -435,7 +453,7 @@ lltok::Kind LLLexer::LexQuote() {
return kind;
if (CurPtr[0] == ':') {
- ++CurPtr;
+ getNextChar();
if (StringRef(StrVal).contains(0)) {
LexError("NUL character is not allowed in names");
kind = lltok::Error;
@@ -455,11 +473,11 @@ lltok::Kind LLLexer::LexExclaim() {
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
- ++CurPtr;
+ getNextChar();
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
- ++CurPtr;
+ getNextChar();
StrVal.assign(TokStart+1, CurPtr); // Skip !
UnEscapeLexed(StrVal);
@@ -495,7 +513,7 @@ lltok::Kind LLLexer::LexIdentifier() {
const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
const char *KeywordEnd = nullptr;
- for (; isLabelChar(*CurPtr); ++CurPtr) {
+ for (; isLabelChar(*CurPtr); getNextChar()) {
// If we decide this is an integer, remember the end of the sequence.
if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
IntEnd = CurPtr;
@@ -507,7 +525,8 @@ lltok::Kind LLLexer::LexIdentifier() {
// If we stopped due to a colon, unless we were directed to ignore it,
// this really is a label.
if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
- StrVal.assign(StartChar-1, CurPtr++);
+ StrVal.assign(StartChar - 1, CurPtr);
+ getNextChar();
return lltok::LabelStr;
}
@@ -515,7 +534,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// return it.
if (!IntEnd) IntEnd = CurPtr;
if (IntEnd != StartChar) {
- CurPtr = IntEnd;
+ advancePositionTo(IntEnd);
uint64_t NumBits = atoull(StartChar, CurPtr);
if (NumBits < IntegerType::MIN_INT_BITS ||
NumBits > IntegerType::MAX_INT_BITS) {
@@ -528,7 +547,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// Otherwise, this was a letter sequence. See which keyword this is.
if (!KeywordEnd) KeywordEnd = CurPtr;
- CurPtr = KeywordEnd;
+ advancePositionTo(KeywordEnd);
--StartChar;
StringRef Keyword(StartChar, CurPtr - StartChar);
@@ -1042,7 +1061,7 @@ lltok::Kind LLLexer::LexIdentifier() {
StringRef HexStr(TokStart + 3, len);
if (!all_of(HexStr, isxdigit)) {
// Bad token, return it as an error.
- CurPtr = TokStart+3;
+ advancePositionTo(TokStart + 3);
return lltok::Error;
}
APInt Tmp(bits, HexStr, 16);
@@ -1055,12 +1074,12 @@ lltok::Kind LLLexer::LexIdentifier() {
// If this is "cc1234", return this as just "cc".
if (TokStart[0] == 'c' && TokStart[1] == 'c') {
- CurPtr = TokStart+2;
+ advancePositionTo(TokStart + 2);
return lltok::kw_cc;
}
// Finally, if this isn't known, return an error.
- CurPtr = TokStart+1;
+ advancePositionTo(TokStart + 1);
return lltok::Error;
}
@@ -1073,24 +1092,25 @@ lltok::Kind LLLexer::LexIdentifier() {
/// HexHalfConstant 0xH[0-9A-Fa-f]+
/// HexBFloatConstant 0xR[0-9A-Fa-f]+
lltok::Kind LLLexer::Lex0x() {
- CurPtr = TokStart + 2;
+ advancePositionTo(TokStart + 2);
char Kind;
if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
CurPtr[0] == 'R') {
- Kind = *CurPtr++;
+ Kind = *CurPtr;
+ getNextChar();
} else {
Kind = 'J';
}
if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
// Bad token, return it as an error.
- CurPtr = TokStart+1;
+ advancePositionTo(TokStart + 1);
return lltok::Error;
}
while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
- ++CurPtr;
+ getNextChar();
if (Kind == 'J') {
// HexFPConstant - Floating point constant represented in IEEE format as a
@@ -1147,7 +1167,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// Okay, this is not a number after the -, it's probably a label.
if (const char *End = isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
- CurPtr = End;
+ advancePositionTo(End);
return lltok::LabelStr;
}
@@ -1157,13 +1177,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// At this point, it is either a label, int or fp constant.
// Skip digits, we have at least one.
- for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ for (; isdigit(static_cast<unsigned char>(CurPtr[0])); getNextChar())
/*empty*/;
// Check if this is a fully-numeric label:
if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
uint64_t Val = atoull(TokStart, CurPtr);
- ++CurPtr; // Skip the colon.
+ getNextChar(); // Skip the colon.
if ((unsigned)Val != Val)
LexError("invalid value number (too large)");
UIntVal = unsigned(Val);
@@ -1174,7 +1194,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
if (const char *End = isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
- CurPtr = End;
+ advancePositionTo(End);
return lltok::LabelStr;
}
}
@@ -1188,17 +1208,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
return lltok::APSInt;
}
- ++CurPtr;
+ getNextChar();
// Skip over [0-9]*([eE][-+]?[0-9]+)?
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
- CurPtr += 2;
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ skipNChars(2);
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
}
}
@@ -1216,26 +1238,29 @@ lltok::Kind LLLexer::LexPositive() {
return lltok::Error;
// Skip digits.
- for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
+ getNextChar())
/*empty*/;
// At this point, we need a '.'.
if (CurPtr[0] != '.') {
- CurPtr = TokStart+1;
+ advancePositionTo(TokStart + 1);
return lltok::Error;
}
- ++CurPtr;
+ getNextChar();
// Skip over [0-9]*([eE][-+]?[0-9]+)?
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
- CurPtr += 2;
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ skipNChars(2);
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
}
}
>From 06926e9fa28e10abbec7803c0cd8c196ea09daf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Mon, 4 Aug 2025 09:36:51 +0000
Subject: [PATCH 02/31] Remove remains from cherry pick from LSP branch
---
llvm/lib/AsmParser/LLLexer.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 7cefd4f6b4935..db4079975ad40 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -198,10 +198,8 @@ const char *LLLexer::skipNChars(unsigned N) {
void LLLexer::advancePositionTo(const char *Ptr) {
while (CurPtr != Ptr) {
- // FIXME: Assumes that if moving back, we stay in that line
if (CurPtr > Ptr) {
--CurPtr;
- --CurColNum;
} else
getNextChar();
}
>From 1fdf13c326ba2f7889b27de5789c0339190fa0f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Mon, 4 Aug 2025 09:49:32 +0000
Subject: [PATCH 03/31] Make isLabelTail more safe and rename it to better show
what it does
---
llvm/include/llvm/AsmParser/LLLexer.h | 3 ++
llvm/lib/AsmParser/LLLexer.cpp | 43 +++++++++++----------------
2 files changed, 21 insertions(+), 25 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 5f6e32a4bf5e1..d0d6f72c197da 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -93,6 +93,9 @@ namespace llvm {
private:
lltok::Kind LexToken();
+ // Return closes pointer after `Ptr` that is an end of a label.
+ // Returns nullptr if `Ptr` doesn't point into a label.
+ const char *getLabelTail(const char *Ptr);
int getNextChar();
const char *skipNChars(unsigned N);
void advancePositionTo(const char *Ptr);
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index db4079975ad40..bbd6b690a97c0 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -155,15 +155,6 @@ static bool isLabelChar(char C) {
C == '.' || C == '_';
}
-/// isLabelTail - Return true if this pointer points to a valid end of a label.
-static const char *isLabelTail(const char *CurPtr) {
- while (true) {
- if (CurPtr[0] == ':') return CurPtr+1;
- if (!isLabelChar(CurPtr[0])) return nullptr;
- ++CurPtr;
- }
-}
-
//===----------------------------------------------------------------------===//
// Lexer definition.
//===----------------------------------------------------------------------===//
@@ -174,20 +165,22 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
CurPtr = CurBuf.begin();
}
+/// getLabelTail - Return true if this pointer points to a valid end of a label.
+const char *LLLexer::getLabelTail(const char *Ptr) {
+ while (Ptr != CurBuf.end()) {
+ if (Ptr[0] == ':')
+ return Ptr + 1;
+ if (!isLabelChar(Ptr[0]))
+ return nullptr;
+ ++Ptr;
+ }
+ return nullptr;
+}
+
int LLLexer::getNextChar() {
- char CurChar = *CurPtr++;
- switch (CurChar) {
- default: return (unsigned char)CurChar;
- case 0:
- // A nul character in the stream is either the end of the current buffer or
- // a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf.end())
- return 0; // Just whitespace.
-
- // Otherwise, return end of file.
- --CurPtr; // Another call to lex will return EOF again.
+ if (CurPtr == CurBuf.end())
return EOF;
- }
+ return *CurPtr++;
}
const char *LLLexer::skipNChars(unsigned N) {
@@ -230,7 +223,7 @@ lltok::Kind LLLexer::LexToken() {
case '%': return LexPercent();
case '"': return LexQuote();
case '.':
- if (const char *Ptr = isLabelTail(CurPtr)) {
+ if (const char *Ptr = getLabelTail(CurPtr)) {
advancePositionTo(Ptr);
StrVal.assign(TokStart, CurPtr-1);
return lltok::LabelStr;
@@ -313,7 +306,7 @@ lltok::Kind LLLexer::LexAt() {
}
lltok::Kind LLLexer::LexDollar() {
- if (const char *Ptr = isLabelTail(TokStart)) {
+ if (const char *Ptr = getLabelTail(TokStart)) {
advancePositionTo(Ptr);
StrVal.assign(TokStart, CurPtr - 1);
return lltok::LabelStr;
@@ -1163,7 +1156,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
!isdigit(static_cast<unsigned char>(CurPtr[0]))) {
// Okay, this is not a number after the -, it's probably a label.
- if (const char *End = isLabelTail(CurPtr)) {
+ if (const char *End = getLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
advancePositionTo(End);
return lltok::LabelStr;
@@ -1190,7 +1183,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// Check to see if this really is a string label, e.g. "-1:".
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
- if (const char *End = isLabelTail(CurPtr)) {
+ if (const char *End = getLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
advancePositionTo(End);
return lltok::LabelStr;
>From 2772cd8f679124daed8bae737451dcf54b637694 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Mon, 4 Aug 2025 09:51:38 +0000
Subject: [PATCH 04/31] Remove dangling comment
---
llvm/lib/AsmParser/LLLexer.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index bbd6b690a97c0..578ac851e38c5 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -165,7 +165,6 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
CurPtr = CurBuf.begin();
}
-/// getLabelTail - Return true if this pointer points to a valid end of a label.
const char *LLLexer::getLabelTail(const char *Ptr) {
while (Ptr != CurBuf.end()) {
if (Ptr[0] == ':')
>From b05d11ac2ee3f8e14020a0fa4d2e7c602bb3d77b Mon Sep 17 00:00:00 2001
From: Bertik23 <39457484+Bertik23 at users.noreply.github.com>
Date: Tue, 12 Aug 2025 12:23:23 +0200
Subject: [PATCH 05/31] Fix typo
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/include/llvm/AsmParser/LLLexer.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index d0d6f72c197da..beb88a8c73305 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -93,7 +93,7 @@ namespace llvm {
private:
lltok::Kind LexToken();
- // Return closes pointer after `Ptr` that is an end of a label.
+ // Return closest pointer after `Ptr` that is an end of a label.
// Returns nullptr if `Ptr` doesn't point into a label.
const char *getLabelTail(const char *Ptr);
int getNextChar();
>From 458599b36cfdc15d7d79c7c7ff0fe770c4d0685d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Thu, 28 Aug 2025 07:57:04 +0000
Subject: [PATCH 06/31] Add location tracking to IR parser
---
.../include/llvm/AsmParser/AsmParserContext.h | 53 +++++++
llvm/include/llvm/AsmParser/LLLexer.h | 35 ++++-
llvm/include/llvm/AsmParser/LLParser.h | 9 +-
llvm/include/llvm/AsmParser/Parser.h | 16 ++-
llvm/include/llvm/IR/Value.h | 32 +++++
llvm/include/llvm/IRReader/IRReader.h | 17 +--
llvm/lib/AsmParser/AsmParserContext.cpp | 91 ++++++++++++
llvm/lib/AsmParser/CMakeLists.txt | 1 +
llvm/lib/AsmParser/LLLexer.cpp | 131 +++++++++++++-----
llvm/lib/AsmParser/LLParser.cpp | 27 +++-
llvm/lib/AsmParser/Parser.cpp | 31 +++--
llvm/lib/IRReader/IRReader.cpp | 13 +-
llvm/unittests/AsmParser/AsmParserTest.cpp | 60 ++++++++
13 files changed, 440 insertions(+), 76 deletions(-)
create mode 100644 llvm/include/llvm/AsmParser/AsmParserContext.h
create mode 100644 llvm/lib/AsmParser/AsmParserContext.cpp
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
new file mode 100644
index 0000000000000..bc4d93ef727ef
--- /dev/null
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -0,0 +1,53 @@
+//===-- AsmParserContext.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_ASMPARSER_STATE_H
+#define LLVM_ASMPARSER_ASMPARSER_STATE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Value.h"
+#include <optional>
+
+namespace llvm {
+
+/// Registry of file location information for LLVM IR constructs
+///
+/// This class provides access to the file location information
+/// for various LLVM IR constructs. Currently, it supports Function,
+/// BasicBlock and Instruction locations.
+///
+/// When available, it can answer queries about what is at a given
+/// file location, as well as where in a file a given IR construct
+/// is.
+///
+/// This information is optionally emitted by the LLParser while
+/// it reads LLVM textual IR.
+class AsmParserContext {
+public:
+ std::optional<FileLocRange> getFunctionLocation(const Function *) const;
+ std::optional<FileLocRange> getBlockLocation(const BasicBlock *) const;
+ std::optional<FileLocRange> getInstructionLocation(const Instruction *) const;
+ std::optional<Function *> getFunctionAtLocation(const FileLocRange &) const;
+ std::optional<Function *> getFunctionAtLocation(const FileLoc &) const;
+ std::optional<BasicBlock *> getBlockAtLocation(const FileLocRange &) const;
+ std::optional<BasicBlock *> getBlockAtLocation(const FileLoc &) const;
+ std::optional<Instruction *>
+ getInstructionAtLocation(const FileLocRange &) const;
+ std::optional<Instruction *> getInstructionAtLocation(const FileLoc &) const;
+ bool addFunctionLocation(Function *, const FileLocRange &);
+ bool addBlockLocation(BasicBlock *, const FileLocRange &);
+ bool addInstructionLocation(Instruction *, const FileLocRange &);
+
+private:
+ DenseMap<Function *, FileLocRange> Functions;
+ DenseMap<BasicBlock *, FileLocRange> Blocks;
+ DenseMap<Instruction *, FileLocRange> Instructions;
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 501a7aefccd7f..3d0e28ea9f5bc 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -29,6 +29,20 @@ namespace llvm {
const char *CurPtr;
StringRef CurBuf;
+ // The line number at `CurPtr-1`, zero-indexed
+ unsigned CurLineNum = 0;
+ // The column number at `CurPtr-1`, zero-indexed
+ unsigned CurColNum = -1;
+ // The line number of the start of the current token, zero-indexed
+ unsigned CurTokLineNum = 0;
+ // The column number of the start of the current token, zero-indexed
+ unsigned CurTokColNum = 0;
+ // The line number of the end of the current token, zero-indexed
+ unsigned PrevTokEndLineNum = -1;
+ // The column number of the end (exclusive) of the current token,
+ // zero-indexed
+ unsigned PrevTokEndColNum = -1;
+
enum class ErrorPriority {
None, // No error message present.
Parser, // Errors issued by parser.
@@ -62,9 +76,7 @@ namespace llvm {
explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &,
LLVMContext &C);
- lltok::Kind Lex() {
- return CurKind = LexToken();
- }
+ lltok::Kind Lex() { return CurKind = LexToken(); }
typedef SMLoc LocTy;
LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
@@ -79,6 +91,21 @@ namespace llvm {
IgnoreColonInIdentifiers = val;
}
+ // Get the current line number, zero-indexed
+ unsigned getLineNum() { return CurLineNum; }
+ // Get the current column number, zero-indexed
+ unsigned getColNum() { return CurColNum; }
+ // Get the line number of the start of the current token, zero-indexed
+ unsigned getTokLineNum() { return CurTokLineNum; }
+ // Get the column number of the start of the current token, zero-indexed
+ unsigned getTokColNum() { return CurTokColNum; }
+ // Get the line number of the end of the previous token, zero-indexed,
+ // exclusive
+ unsigned getPrevTokEndLineNum() { return PrevTokEndLineNum; }
+ // Get the column number of the end of the previous token, zero-indexed,
+ // exclusive
+ unsigned getPrevTokEndColNum() { return PrevTokEndColNum; }
+
// This returns true as a convenience for the parser functions that return
// true on error.
bool ParseError(LocTy ErrorLoc, const Twine &Msg) {
@@ -94,6 +121,8 @@ namespace llvm {
lltok::Kind LexToken();
int getNextChar();
+ const char *skipNChars(unsigned N);
+ void advancePositionTo(const char *Ptr);
void SkipLineComment();
bool SkipCComment();
lltok::Kind ReadString(lltok::Kind kind);
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index c01de4a289a69..02460e5e52203 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -13,6 +13,7 @@
#ifndef LLVM_ASMPARSER_LLPARSER_H
#define LLVM_ASMPARSER_LLPARSER_H
+#include "AsmParserContext.h"
#include "LLLexer.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/AsmParser/NumberedValues.h"
@@ -177,6 +178,9 @@ namespace llvm {
// Map of module ID to path.
std::map<unsigned, StringRef> ModuleIdMap;
+ /// Keeps track of source locations for Values, BasicBlocks, and Functions
+ AsmParserContext *ParserContext;
+
/// Only the llvm-as tool may set this to false to bypass
/// UpgradeDebuginfo so it can generate broken bitcode.
bool UpgradeDebugInfo;
@@ -189,10 +193,11 @@ namespace llvm {
public:
LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M,
ModuleSummaryIndex *Index, LLVMContext &Context,
- SlotMapping *Slots = nullptr)
+ SlotMapping *Slots = nullptr,
+ AsmParserContext *ParserContext = nullptr)
: Context(Context), OPLex(F, SM, Err, Context),
Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots),
- BlockAddressPFS(nullptr) {}
+ BlockAddressPFS(nullptr), ParserContext(ParserContext) {}
bool Run(
bool UpgradeDebugInfo,
DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h
index c900b79665404..22b0881d92b53 100644
--- a/llvm/include/llvm/AsmParser/Parser.h
+++ b/llvm/include/llvm/AsmParser/Parser.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/AsmParser/AsmParserContext.h"
#include "llvm/Support/Compiler.h"
#include <memory>
#include <optional>
@@ -62,7 +63,8 @@ parseAssemblyFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
/// parsing.
LLVM_ABI std::unique_ptr<Module>
parseAssemblyString(StringRef AsmString, SMDiagnostic &Err,
- LLVMContext &Context, SlotMapping *Slots = nullptr);
+ LLVMContext &Context, SlotMapping *Slots = nullptr,
+ AsmParserContext *ParserContext = nullptr);
/// Holds the Module and ModuleSummaryIndex returned by the interfaces
/// that parse both.
@@ -128,9 +130,9 @@ parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err);
LLVM_ABI std::unique_ptr<Module> parseAssembly(
MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context,
SlotMapping *Slots = nullptr,
- DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
- return std::nullopt;
- });
+ DataLayoutCallbackTy DataLayoutCallback =
+ [](StringRef, StringRef) { return std::nullopt; },
+ AsmParserContext *ParserContext = nullptr);
/// Parse LLVM Assembly including the summary index from a MemoryBuffer.
///
@@ -169,9 +171,9 @@ parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err);
LLVM_ABI bool parseAssemblyInto(
MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err,
SlotMapping *Slots = nullptr,
- DataLayoutCallbackTy DataLayoutCallback = [](StringRef, StringRef) {
- return std::nullopt;
- });
+ DataLayoutCallbackTy DataLayoutCallback =
+ [](StringRef, StringRef) { return std::nullopt; },
+ AsmParserContext *ParserContext = nullptr);
/// Parse a type and a constant value in the given string.
///
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 04d0391c04098..9e27797d151e2 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -55,6 +55,38 @@ class User;
using ValueName = StringMapEntry<Value *>;
+struct FileLoc {
+ unsigned Line;
+ unsigned Col;
+
+ bool operator<=(const FileLoc &RHS) const {
+ return Line < RHS.Line || (Line == RHS.Line && Col <= RHS.Col);
+ }
+
+ bool operator<(const FileLoc &RHS) const {
+ return Line < RHS.Line || (Line == RHS.Line && Col < RHS.Col);
+ }
+
+ FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {}
+};
+
+struct FileLocRange {
+ FileLoc Start;
+ FileLoc End;
+
+ FileLocRange() : Start(0, 0), End(0, 0) {}
+
+ FileLocRange(FileLoc S, FileLoc E) : Start(S), End(E) {
+ assert(Start <= End);
+ }
+
+ bool contains(FileLoc L) const { return Start <= L && L <= End; }
+
+ bool contains(FileLocRange LR) const {
+ return contains(LR.Start) && contains(LR.End);
+ }
+};
+
//===----------------------------------------------------------------------===//
// Value Class
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h
index 790140f19934e..00cf12d342ae0 100644
--- a/llvm/include/llvm/IRReader/IRReader.h
+++ b/llvm/include/llvm/IRReader/IRReader.h
@@ -15,6 +15,7 @@
#define LLVM_IRREADER_IRREADER_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/AsmParser/AsmParserContext.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Support/Compiler.h"
#include <memory>
@@ -50,19 +51,19 @@ getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
/// for it. Otherwise, attempt to parse it as LLVM Assembly and return
/// a Module for it.
/// \param DataLayoutCallback Override datalayout in the llvm assembly.
-LLVM_ABI std::unique_ptr<Module> parseIR(MemoryBufferRef Buffer,
- SMDiagnostic &Err,
- LLVMContext &Context,
- ParserCallbacks Callbacks = {});
+LLVM_ABI std::unique_ptr<Module>
+parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context,
+ ParserCallbacks Callbacks = {},
+ AsmParserContext *ParserContext = nullptr);
/// If the given file holds a bitcode image, return a Module for it.
/// Otherwise, attempt to parse it as LLVM Assembly and return a Module
/// for it.
/// \param DataLayoutCallback Override datalayout in the llvm assembly.
-LLVM_ABI std::unique_ptr<Module> parseIRFile(StringRef Filename,
- SMDiagnostic &Err,
- LLVMContext &Context,
- ParserCallbacks Callbacks = {});
+LLVM_ABI std::unique_ptr<Module>
+parseIRFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
+ ParserCallbacks Callbacks = {},
+ AsmParserContext *ParserContext = nullptr);
}
#endif
diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp
new file mode 100644
index 0000000000000..f5e3d83f5d346
--- /dev/null
+++ b/llvm/lib/AsmParser/AsmParserContext.cpp
@@ -0,0 +1,91 @@
+//===-- AsmParserContext.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/AsmParser/AsmParserContext.h"
+
+namespace llvm {
+
+std::optional<FileLocRange>
+AsmParserContext::getFunctionLocation(const Function *F) const {
+ if (!Functions.contains(F))
+ return std::nullopt;
+ return Functions.at(F);
+}
+
+std::optional<FileLocRange>
+AsmParserContext::getBlockLocation(const BasicBlock *BB) const {
+ if (!Blocks.contains(BB))
+ return std::nullopt;
+ return Blocks.at(BB);
+}
+
+std::optional<FileLocRange>
+AsmParserContext::getInstructionLocation(const Instruction *I) const {
+ if (!Instructions.contains(I))
+ return std::nullopt;
+ return Instructions.at(I);
+}
+
+std::optional<Function *>
+AsmParserContext::getFunctionAtLocation(const FileLocRange &Query) const {
+ for (auto &[F, Loc] : Functions) {
+ if (Loc.contains(Query))
+ return F;
+ }
+ return std::nullopt;
+}
+
+std::optional<Function *>
+AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const {
+ return getFunctionAtLocation(FileLocRange(Query, Query));
+}
+
+std::optional<BasicBlock *>
+AsmParserContext::getBlockAtLocation(const FileLocRange &Query) const {
+ for (auto &[BB, Loc] : Blocks) {
+ if (Loc.contains(Query))
+ return BB;
+ }
+ return std::nullopt;
+}
+
+std::optional<BasicBlock *>
+AsmParserContext::getBlockAtLocation(const FileLoc &Query) const {
+ return getBlockAtLocation(FileLocRange(Query, Query));
+}
+
+std::optional<Instruction *>
+AsmParserContext::getInstructionAtLocation(const FileLocRange &Query) const {
+ for (auto &[I, Loc] : Instructions) {
+ if (Loc.contains(Query))
+ return I;
+ }
+ return std::nullopt;
+}
+
+std::optional<Instruction *>
+AsmParserContext::getInstructionAtLocation(const FileLoc &Query) const {
+ return getInstructionAtLocation(FileLocRange(Query, Query));
+}
+
+bool AsmParserContext::addFunctionLocation(Function *F,
+ const FileLocRange &Loc) {
+ return Functions.insert({F, Loc}).second;
+}
+
+bool AsmParserContext::addBlockLocation(BasicBlock *BB,
+ const FileLocRange &Loc) {
+ return Blocks.insert({BB, Loc}).second;
+}
+
+bool AsmParserContext::addInstructionLocation(Instruction *I,
+ const FileLocRange &Loc) {
+ return Instructions.insert({I, Loc}).second;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/AsmParser/CMakeLists.txt b/llvm/lib/AsmParser/CMakeLists.txt
index 20d0c50a029ca..dcfcc06f093a7 100644
--- a/llvm/lib/AsmParser/CMakeLists.txt
+++ b/llvm/lib/AsmParser/CMakeLists.txt
@@ -1,5 +1,6 @@
# AsmParser
add_llvm_component_library(LLVMAsmParser
+ AsmParserContext.cpp
LLLexer.cpp
LLParser.cpp
Parser.cpp
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 3d5bd6155536e..a209de05c39db 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -176,6 +176,14 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
int LLLexer::getNextChar() {
char CurChar = *CurPtr++;
+ // Increment line number if this is the first character after a newline
+ // CurPtr points to the char after CurChar, so two positions before that
+ if ((CurPtr - 2) >= CurBuf.begin() && *(CurPtr - 2) == '\n') {
+ CurLineNum++;
+ CurColNum = 0;
+ } else
+ CurColNum++;
+
switch (CurChar) {
default: return (unsigned char)CurChar;
case 0:
@@ -190,11 +198,52 @@ int LLLexer::getNextChar() {
}
}
+const char *LLLexer::skipNChars(unsigned N) {
+ while (N--)
+ getNextChar();
+ return CurPtr;
+}
+
+void LLLexer::advancePositionTo(const char *Ptr) {
+ bool RecalculateColumn = false;
+ while (CurPtr != Ptr) {
+ if (CurPtr > Ptr) {
+ --CurPtr;
+ --CurColNum;
+ // Since CurPtr is one char ahead of the stored position, chech if the
+ // previous char is not a newline
+ if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') {
+ --CurLineNum;
+ RecalculateColumn = true;
+ }
+ } else
+ getNextChar();
+ }
+ if (RecalculateColumn) {
+ CurColNum = 0;
+ // Count the number of chars to the previous newline or start of buffer
+ for (const char *Ptr = CurPtr; Ptr != CurBuf.begin() && *(Ptr - 1) != '\n';
+ --Ptr, ++CurColNum)
+ ;
+ }
+}
+
lltok::Kind LLLexer::LexToken() {
+ // Set token end to next location, since the end is
+ // exclusive
+ if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') {
+ PrevTokEndLineNum = CurLineNum + 1;
+ PrevTokEndColNum = 0;
+ } else {
+ PrevTokEndLineNum = CurLineNum;
+ PrevTokEndColNum = CurColNum + 1;
+ }
while (true) {
TokStart = CurPtr;
-
int CurChar = getNextChar();
+ CurTokColNum = CurColNum;
+ CurTokLineNum = CurLineNum;
+
switch (CurChar) {
default:
// Handle letters: [a-zA-Z_]
@@ -216,12 +265,12 @@ lltok::Kind LLLexer::LexToken() {
case '"': return LexQuote();
case '.':
if (const char *Ptr = isLabelTail(CurPtr)) {
- CurPtr = Ptr;
+ advancePositionTo(Ptr);
StrVal.assign(TokStart, CurPtr-1);
return lltok::LabelStr;
}
if (CurPtr[0] == '.' && CurPtr[1] == '.') {
- CurPtr += 2;
+ skipNChars(2);
return lltok::dotdotdot;
}
return lltok::Error;
@@ -299,14 +348,14 @@ lltok::Kind LLLexer::LexAt() {
lltok::Kind LLLexer::LexDollar() {
if (const char *Ptr = isLabelTail(TokStart)) {
- CurPtr = Ptr;
+ advancePositionTo(Ptr);
StrVal.assign(TokStart, CurPtr - 1);
return lltok::LabelStr;
}
// Handle DollarStringConstant: $\"[^\"]*\"
if (CurPtr[0] == '"') {
- ++CurPtr;
+ getNextChar();
while (true) {
int CurChar = getNextChar();
@@ -358,11 +407,11 @@ bool LLLexer::ReadVarName() {
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_') {
- ++CurPtr;
+ getNextChar();
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_')
- ++CurPtr;
+ getNextChar();
StrVal.assign(NameStart, CurPtr);
return true;
@@ -376,7 +425,8 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
return lltok::Error;
- for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
+ getNextChar())
/*empty*/;
uint64_t Val = atoull(TokStart + 1, CurPtr);
@@ -389,7 +439,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
// Handle StringConstant: \"[^\"]*\"
if (CurPtr[0] == '"') {
- ++CurPtr;
+ getNextChar();
while (true) {
int CurChar = getNextChar();
@@ -435,7 +485,7 @@ lltok::Kind LLLexer::LexQuote() {
return kind;
if (CurPtr[0] == ':') {
- ++CurPtr;
+ getNextChar();
if (StringRef(StrVal).contains(0)) {
LexError("NUL character is not allowed in names");
kind = lltok::Error;
@@ -455,11 +505,11 @@ lltok::Kind LLLexer::LexExclaim() {
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
- ++CurPtr;
+ getNextChar();
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
- ++CurPtr;
+ getNextChar();
StrVal.assign(TokStart+1, CurPtr); // Skip !
UnEscapeLexed(StrVal);
@@ -495,7 +545,7 @@ lltok::Kind LLLexer::LexIdentifier() {
const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
const char *KeywordEnd = nullptr;
- for (; isLabelChar(*CurPtr); ++CurPtr) {
+ for (; isLabelChar(*CurPtr); getNextChar()) {
// If we decide this is an integer, remember the end of the sequence.
if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
IntEnd = CurPtr;
@@ -507,7 +557,8 @@ lltok::Kind LLLexer::LexIdentifier() {
// If we stopped due to a colon, unless we were directed to ignore it,
// this really is a label.
if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
- StrVal.assign(StartChar-1, CurPtr++);
+ StrVal.assign(StartChar - 1, CurPtr);
+ getNextChar();
return lltok::LabelStr;
}
@@ -515,7 +566,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// return it.
if (!IntEnd) IntEnd = CurPtr;
if (IntEnd != StartChar) {
- CurPtr = IntEnd;
+ advancePositionTo(IntEnd);
uint64_t NumBits = atoull(StartChar, CurPtr);
if (NumBits < IntegerType::MIN_INT_BITS ||
NumBits > IntegerType::MAX_INT_BITS) {
@@ -528,7 +579,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// Otherwise, this was a letter sequence. See which keyword this is.
if (!KeywordEnd) KeywordEnd = CurPtr;
- CurPtr = KeywordEnd;
+ advancePositionTo(KeywordEnd);
--StartChar;
StringRef Keyword(StartChar, CurPtr - StartChar);
@@ -1043,7 +1094,7 @@ lltok::Kind LLLexer::LexIdentifier() {
StringRef HexStr(TokStart + 3, len);
if (!all_of(HexStr, isxdigit)) {
// Bad token, return it as an error.
- CurPtr = TokStart+3;
+ advancePositionTo(TokStart + 3);
return lltok::Error;
}
APInt Tmp(bits, HexStr, 16);
@@ -1056,12 +1107,12 @@ lltok::Kind LLLexer::LexIdentifier() {
// If this is "cc1234", return this as just "cc".
if (TokStart[0] == 'c' && TokStart[1] == 'c') {
- CurPtr = TokStart+2;
+ advancePositionTo(TokStart + 2);
return lltok::kw_cc;
}
// Finally, if this isn't known, return an error.
- CurPtr = TokStart+1;
+ advancePositionTo(TokStart + 1);
return lltok::Error;
}
@@ -1074,24 +1125,25 @@ lltok::Kind LLLexer::LexIdentifier() {
/// HexHalfConstant 0xH[0-9A-Fa-f]+
/// HexBFloatConstant 0xR[0-9A-Fa-f]+
lltok::Kind LLLexer::Lex0x() {
- CurPtr = TokStart + 2;
+ advancePositionTo(TokStart + 2);
char Kind;
if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
CurPtr[0] == 'R') {
- Kind = *CurPtr++;
+ Kind = *CurPtr;
+ getNextChar();
} else {
Kind = 'J';
}
if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
// Bad token, return it as an error.
- CurPtr = TokStart+1;
+ advancePositionTo(TokStart + 1);
return lltok::Error;
}
while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
- ++CurPtr;
+ getNextChar();
if (Kind == 'J') {
// HexFPConstant - Floating point constant represented in IEEE format as a
@@ -1148,7 +1200,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// Okay, this is not a number after the -, it's probably a label.
if (const char *End = isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
- CurPtr = End;
+ advancePositionTo(End);
return lltok::LabelStr;
}
@@ -1158,13 +1210,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// At this point, it is either a label, int or fp constant.
// Skip digits, we have at least one.
- for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ for (; isdigit(static_cast<unsigned char>(CurPtr[0])); getNextChar())
/*empty*/;
// Check if this is a fully-numeric label:
if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
uint64_t Val = atoull(TokStart, CurPtr);
- ++CurPtr; // Skip the colon.
+ getNextChar(); // Skip the colon.
if ((unsigned)Val != Val)
LexError("invalid value number (too large)");
UIntVal = unsigned(Val);
@@ -1175,7 +1227,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
if (const char *End = isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
- CurPtr = End;
+ advancePositionTo(End);
return lltok::LabelStr;
}
}
@@ -1189,17 +1241,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
return lltok::APSInt;
}
- ++CurPtr;
+ getNextChar();
// Skip over [0-9]*([eE][-+]?[0-9]+)?
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
- CurPtr += 2;
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ skipNChars(2);
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
}
}
@@ -1217,26 +1271,29 @@ lltok::Kind LLLexer::LexPositive() {
return lltok::Error;
// Skip digits.
- for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+ for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
+ getNextChar())
/*empty*/;
// At this point, we need a '.'.
if (CurPtr[0] != '.') {
- CurPtr = TokStart+1;
+ advancePositionTo(TokStart + 1);
return lltok::Error;
}
- ++CurPtr;
+ getNextChar();
// Skip over [0-9]*([eE][-+]?[0-9]+)?
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
- CurPtr += 2;
- while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+ skipNChars(2);
+ while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+ getNextChar();
}
}
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 1bc2906f63b07..03fe1097f8612 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -740,14 +740,22 @@ bool LLParser::parseDeclare() {
/// ::= 'define' FunctionHeader (!dbg !56)* '{' ...
bool LLParser::parseDefine() {
assert(Lex.getKind() == lltok::kw_define);
+ FileLoc FunctionStart(Lex.getTokLineNum(), Lex.getTokColNum());
Lex.Lex();
Function *F;
unsigned FunctionNumber = -1;
SmallVector<unsigned> UnnamedArgNums;
- return parseFunctionHeader(F, true, FunctionNumber, UnnamedArgNums) ||
- parseOptionalFunctionMetadata(*F) ||
- parseFunctionBody(*F, FunctionNumber, UnnamedArgNums);
+ bool RetValue =
+ parseFunctionHeader(F, true, FunctionNumber, UnnamedArgNums) ||
+ parseOptionalFunctionMetadata(*F) ||
+ parseFunctionBody(*F, FunctionNumber, UnnamedArgNums);
+ if (ParserContext)
+ ParserContext->addFunctionLocation(
+ F, FileLocRange(FunctionStart, {Lex.getPrevTokEndLineNum(),
+ Lex.getPrevTokEndColNum()}));
+
+ return RetValue;
}
/// parseGlobalType
@@ -6951,6 +6959,8 @@ bool LLParser::parseFunctionBody(Function &Fn, unsigned FunctionNumber,
/// parseBasicBlock
/// ::= (LabelStr|LabelID)? Instruction*
bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
+ FileLoc BBStart(Lex.getTokLineNum(), Lex.getTokColNum());
+
// If this basic block starts out with a name, remember it.
std::string Name;
int NameID = -1;
@@ -6992,6 +7002,7 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord);
}
+ FileLoc InstStart(Lex.getTokLineNum(), Lex.getTokColNum());
// This instruction may have three possibilities for a name: a) none
// specified, b) name specified "%foo =", c) number specified: "%4 =".
LocTy NameLoc = Lex.getLoc();
@@ -7041,8 +7052,18 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
for (DbgRecordPtr &DR : TrailingDbgRecord)
BB->insertDbgRecordBefore(DR.release(), Inst->getIterator());
TrailingDbgRecord.clear();
+ if (ParserContext) {
+ ParserContext->addInstructionLocation(
+ Inst, FileLocRange(InstStart, {Lex.getPrevTokEndLineNum(),
+ Lex.getPrevTokEndColNum()}));
+ }
} while (!Inst->isTerminator());
+ if (ParserContext)
+ ParserContext->addBlockLocation(
+ BB, FileLocRange(BBStart, {Lex.getPrevTokEndLineNum(),
+ Lex.getPrevTokEndColNum()}));
+
assert(TrailingDbgRecord.empty() &&
"All debug values should have been attached to an instruction.");
diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp
index 07fdce981b084..c5346d0977314 100644
--- a/llvm/lib/AsmParser/Parser.cpp
+++ b/llvm/lib/AsmParser/Parser.cpp
@@ -24,33 +24,38 @@ using namespace llvm;
static bool parseAssemblyInto(MemoryBufferRef F, Module *M,
ModuleSummaryIndex *Index, SMDiagnostic &Err,
SlotMapping *Slots, bool UpgradeDebugInfo,
- DataLayoutCallbackTy DataLayoutCallback) {
+ DataLayoutCallbackTy DataLayoutCallback,
+ AsmParserContext *ParserContext = nullptr) {
SourceMgr SM;
std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F);
SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
std::optional<LLVMContext> OptContext;
return LLParser(F.getBuffer(), SM, Err, M, Index,
- M ? M->getContext() : OptContext.emplace(), Slots)
+ M ? M->getContext() : OptContext.emplace(), Slots,
+ ParserContext)
.Run(UpgradeDebugInfo, DataLayoutCallback);
}
bool llvm::parseAssemblyInto(MemoryBufferRef F, Module *M,
ModuleSummaryIndex *Index, SMDiagnostic &Err,
SlotMapping *Slots,
- DataLayoutCallbackTy DataLayoutCallback) {
+ DataLayoutCallbackTy DataLayoutCallback,
+ AsmParserContext *ParserContext) {
return ::parseAssemblyInto(F, M, Index, Err, Slots,
- /*UpgradeDebugInfo*/ true, DataLayoutCallback);
+ /*UpgradeDebugInfo*/ true, DataLayoutCallback,
+ ParserContext);
}
std::unique_ptr<Module>
llvm::parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context,
- SlotMapping *Slots,
- DataLayoutCallbackTy DataLayoutCallback) {
+ SlotMapping *Slots, DataLayoutCallbackTy DataLayoutCallback,
+ AsmParserContext *ParserContext) {
std::unique_ptr<Module> M =
std::make_unique<Module>(F.getBufferIdentifier(), Context);
- if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, DataLayoutCallback))
+ if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, DataLayoutCallback,
+ ParserContext))
return nullptr;
return M;
@@ -133,12 +138,14 @@ ParsedModuleAndIndex llvm::parseAssemblyFileWithIndexNoUpgradeDebugInfo(
DataLayoutCallback);
}
-std::unique_ptr<Module> llvm::parseAssemblyString(StringRef AsmString,
- SMDiagnostic &Err,
- LLVMContext &Context,
- SlotMapping *Slots) {
+std::unique_ptr<Module>
+llvm::parseAssemblyString(StringRef AsmString, SMDiagnostic &Err,
+ LLVMContext &Context, SlotMapping *Slots,
+ AsmParserContext *ParserContext) {
MemoryBufferRef F(AsmString, "<string>");
- return parseAssembly(F, Err, Context, Slots);
+ return parseAssembly(
+ F, Err, Context, Slots, [](StringRef, StringRef) { return std::nullopt; },
+ ParserContext);
}
static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F,
diff --git a/llvm/lib/IRReader/IRReader.cpp b/llvm/lib/IRReader/IRReader.cpp
index a7e7deee8aa91..c16871f081d1d 100644
--- a/llvm/lib/IRReader/IRReader.cpp
+++ b/llvm/lib/IRReader/IRReader.cpp
@@ -8,6 +8,7 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm-c/IRReader.h"
+#include "llvm/AsmParser/AsmParserContext.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/LLVMContext.h"
@@ -68,7 +69,8 @@ std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename,
std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
LLVMContext &Context,
- ParserCallbacks Callbacks) {
+ ParserCallbacks Callbacks,
+ llvm::AsmParserContext *ParserContext) {
NamedRegionTimer T(TimeIRParsingName, TimeIRParsingDescription,
TimeIRParsingGroupName, TimeIRParsingGroupDescription,
TimePassesIsEnabled);
@@ -88,12 +90,14 @@ std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
return parseAssembly(Buffer, Err, Context, nullptr,
Callbacks.DataLayout.value_or(
- [](StringRef, StringRef) { return std::nullopt; }));
+ [](StringRef, StringRef) { return std::nullopt; }),
+ ParserContext);
}
std::unique_ptr<Module> llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err,
LLVMContext &Context,
- ParserCallbacks Callbacks) {
+ ParserCallbacks Callbacks,
+ AsmParserContext *ParserContext) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
if (std::error_code EC = FileOrErr.getError()) {
@@ -102,7 +106,8 @@ std::unique_ptr<Module> llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err,
return nullptr;
}
- return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context, Callbacks);
+ return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context, Callbacks,
+ ParserContext);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index ce226705068af..49f59696709f2 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -6,7 +6,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/AsmParser/AsmParserContext.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/IR/Constants.h"
@@ -14,6 +16,8 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"
@@ -479,4 +483,60 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) {
ASSERT_EQ(Mapping.MetadataNodes.size(), 0u);
}
+#define ASSERT_EQ_LOC(Loc1, Loc2) \
+ do { \
+ bool AreLocsEqual = Loc1.contains(Loc2) && Loc2.contains(Loc1); \
+ if (!AreLocsEqual) { \
+ dbgs() << #Loc1 " location: " << Loc1.Start.Line << ":" \
+ << Loc1.Start.Col << " - " << Loc1.End.Line << ":" \
+ << Loc1.End.Col << "\n"; \
+ dbgs() << #Loc2 " location: " << Loc2.Start.Line << ":" \
+ << Loc2.Start.Col << " - " << Loc2.End.Line << ":" \
+ << Loc2.End.Col << "\n"; \
+ } \
+ ASSERT_TRUE(AreLocsEqual); \
+ } while (false)
+
+TEST(AsmParserTest, ParserObjectLocations) {
+ // Expected to fail with function location starting one character later, needs
+ // a fix
+ StringRef Source = "define i32 @main() {\n"
+ "entry:\n"
+ " %a = add i32 1, 2\n"
+ " ret i32 %a\n"
+ "}\n";
+ LLVMContext Ctx;
+ SMDiagnostic Error;
+ SlotMapping Mapping;
+ AsmParserContext ParserContext;
+ auto Mod = parseAssemblyString(Source, Error, Ctx, &Mapping, &ParserContext);
+
+ auto *MainFn = Mod->getFunction("main");
+ ASSERT_TRUE(MainFn != nullptr);
+
+ auto MaybeMainLoc = ParserContext.getFunctionLocation(MainFn);
+ ASSERT_TRUE(MaybeMainLoc.has_value());
+ auto MainLoc = MaybeMainLoc.value();
+ auto ExpectedMainLoc = FileLocRange(FileLoc{0, 0}, FileLoc{4, 1});
+ ASSERT_EQ_LOC(MainLoc, ExpectedMainLoc);
+
+ auto &EntryBB = MainFn->getEntryBlock();
+ auto MaybeEntryBBLoc = ParserContext.getBlockLocation(&EntryBB);
+ ASSERT_TRUE(MaybeEntryBBLoc.has_value());
+ auto EntryBBLoc = MaybeEntryBBLoc.value();
+ auto ExpectedEntryBBLoc = FileLocRange(FileLoc{1, 0}, FileLoc{3, 14});
+ ASSERT_EQ_LOC(EntryBBLoc, ExpectedEntryBBLoc);
+
+ SmallVector<FileLocRange> InstructionLocations = {
+ FileLocRange(FileLoc{2, 4}, FileLoc{2, 21}),
+ FileLocRange(FileLoc{3, 4}, FileLoc{3, 14})};
+
+ for (const auto &[Inst, ExpectedLoc] : zip(EntryBB, InstructionLocations)) {
+ auto MaybeInstLoc = ParserContext.getInstructionLocation(&Inst);
+ ASSERT_TRUE(MaybeMainLoc.has_value());
+ auto InstLoc = MaybeInstLoc.value();
+ ASSERT_EQ_LOC(InstLoc, ExpectedLoc);
+ }
+}
+
} // end anonymous namespace
>From b0c5318d100ecb44684c5c31de1b19ab774b5549 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Thu, 28 Aug 2025 10:10:43 +0000
Subject: [PATCH 07/31] Fix clang format
---
llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +-
llvm/lib/AsmParser/LLLexer.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index bc4d93ef727ef..78ea32ac7ca08 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -24,7 +24,7 @@ namespace llvm {
/// When available, it can answer queries about what is at a given
/// file location, as well as where in a file a given IR construct
/// is.
-///
+///
/// This information is optionally emitted by the LLParser while
/// it reads LLVM textual IR.
class AsmParserContext {
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index cd9d5b37d86e5..be5b2b9bce0ca 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -180,7 +180,7 @@ int LLLexer::getNextChar() {
if (CurPtr == CurBuf.end())
return EOF;
// Increment line number if this is the first character after a newline
- if (CurPtr > CurBuf.begin() && *(CurPtr-1) == '\n'){
+ if (CurPtr > CurBuf.begin() && *(CurPtr - 1) == '\n') {
CurLineNum++;
CurColNum = 0;
} else
>From 416514e8eb5de149dffe5bd49035b7a91904d70c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Fri, 29 Aug 2025 07:49:27 +0000
Subject: [PATCH 08/31] Move private members to top of class definition
---
llvm/include/llvm/AsmParser/AsmParserContext.h | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index 78ea32ac7ca08..39c0e0d9df4de 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -28,6 +28,10 @@ namespace llvm {
/// This information is optionally emitted by the LLParser while
/// it reads LLVM textual IR.
class AsmParserContext {
+ DenseMap<Function *, FileLocRange> Functions;
+ DenseMap<BasicBlock *, FileLocRange> Blocks;
+ DenseMap<Instruction *, FileLocRange> Instructions;
+
public:
std::optional<FileLocRange> getFunctionLocation(const Function *) const;
std::optional<FileLocRange> getBlockLocation(const BasicBlock *) const;
@@ -42,11 +46,6 @@ class AsmParserContext {
bool addFunctionLocation(Function *, const FileLocRange &);
bool addBlockLocation(BasicBlock *, const FileLocRange &);
bool addInstructionLocation(Instruction *, const FileLocRange &);
-
-private:
- DenseMap<Function *, FileLocRange> Functions;
- DenseMap<BasicBlock *, FileLocRange> Blocks;
- DenseMap<Instruction *, FileLocRange> Instructions;
};
} // namespace llvm
>From 35ca1a501ac7a7c969df05d8e40ea3530bad75b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Tue, 2 Sep 2025 12:12:51 +0000
Subject: [PATCH 09/31] Use SourceMgr to resolve Line:Column position
---
llvm/include/llvm/AsmParser/LLLexer.h | 8 -----
llvm/lib/AsmParser/LLLexer.cpp | 45 +++++++--------------------
2 files changed, 12 insertions(+), 41 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 5008ef029f3ff..bacf124d07d20 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -29,10 +29,6 @@ namespace llvm {
const char *CurPtr;
StringRef CurBuf;
- // The line number at `CurPtr-1`, zero-indexed
- unsigned CurLineNum = 0;
- // The column number at `CurPtr-1`, zero-indexed
- unsigned CurColNum = -1;
// The line number of the start of the current token, zero-indexed
unsigned CurTokLineNum = 0;
// The column number of the start of the current token, zero-indexed
@@ -91,10 +87,6 @@ namespace llvm {
IgnoreColonInIdentifiers = val;
}
- // Get the current line number, zero-indexed
- unsigned getLineNum() { return CurLineNum; }
- // Get the current column number, zero-indexed
- unsigned getColNum() { return CurColNum; }
// Get the line number of the start of the current token, zero-indexed
unsigned getTokLineNum() { return CurTokLineNum; }
// Get the column number of the start of the current token, zero-indexed
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index be5b2b9bce0ca..0041cc5fd95fa 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -180,11 +180,6 @@ int LLLexer::getNextChar() {
if (CurPtr == CurBuf.end())
return EOF;
// Increment line number if this is the first character after a newline
- if (CurPtr > CurBuf.begin() && *(CurPtr - 1) == '\n') {
- CurLineNum++;
- CurColNum = 0;
- } else
- CurColNum++;
return *CurPtr++;
}
@@ -195,44 +190,28 @@ const char *LLLexer::skipNChars(unsigned N) {
}
void LLLexer::advancePositionTo(const char *Ptr) {
- bool RecalculateColumn = false;
- while (CurPtr != Ptr) {
- if (CurPtr > Ptr) {
- --CurPtr;
- --CurColNum;
- // Since CurPtr is one char ahead of the stored position, check if the
- // previous char is not a newline
- if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') {
- --CurLineNum;
- RecalculateColumn = true;
- }
- } else
- getNextChar();
+ if (CurBuf.begin() > Ptr) {
+ CurPtr = CurBuf.begin();
+ return;
}
- if (RecalculateColumn) {
- CurColNum = 0;
- // Count the number of chars to the previous newline or start of buffer
- for (const char *Ptr = CurPtr; Ptr != CurBuf.begin() && *(Ptr - 1) != '\n';
- --Ptr, ++CurColNum)
- ;
+ if (CurBuf.end() < Ptr) {
+ CurPtr = CurBuf.end();
+ return;
}
+
+ CurPtr = Ptr;
}
lltok::Kind LLLexer::LexToken() {
// Set token end to next location, since the end is
// exclusive
- if (CurPtr != CurBuf.begin() && *(CurPtr - 1) == '\n') {
- PrevTokEndLineNum = CurLineNum + 1;
- PrevTokEndColNum = 0;
- } else {
- PrevTokEndLineNum = CurLineNum;
- PrevTokEndColNum = CurColNum + 1;
- }
+ std::tie(PrevTokEndLineNum, PrevTokEndColNum) =
+ SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr));
while (true) {
TokStart = CurPtr;
+ std::tie(CurTokLineNum, CurTokColNum) =
+ SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr));
int CurChar = getNextChar();
- CurTokColNum = CurColNum;
- CurTokLineNum = CurLineNum;
switch (CurChar) {
default:
>From b3d8254fadec29bde061f89dd74ef85e758419e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Tue, 2 Sep 2025 13:44:38 +0000
Subject: [PATCH 10/31] Fix zeroindexing on token positions
---
llvm/lib/AsmParser/LLLexer.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 0041cc5fd95fa..8ce963702f330 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -207,10 +207,14 @@ lltok::Kind LLLexer::LexToken() {
// exclusive
std::tie(PrevTokEndLineNum, PrevTokEndColNum) =
SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr));
+ --PrevTokEndLineNum;
+ --PrevTokEndColNum;
while (true) {
TokStart = CurPtr;
std::tie(CurTokLineNum, CurTokColNum) =
SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr));
+ --CurTokLineNum;
+ --CurTokColNum;
int CurChar = getNextChar();
switch (CurChar) {
>From 23dcc6b4d05c58dc359c5334fcba0061f92499be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 3 Sep 2025 13:21:54 +0000
Subject: [PATCH 11/31] Replace Line:Column storage with Poiters and on demand
conversion
---
llvm/include/llvm/AsmParser/LLLexer.h | 39 +++++++++++++--------------
llvm/include/llvm/IR/Value.h | 1 +
llvm/lib/AsmParser/LLLexer.cpp | 9 +------
llvm/lib/AsmParser/LLParser.cpp | 15 +++++------
4 files changed, 27 insertions(+), 37 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index bacf124d07d20..5e4d43ebbd4ed 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -17,27 +17,20 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
#include <string>
namespace llvm {
class Type;
class SMDiagnostic;
- class SourceMgr;
class LLVMContext;
class LLLexer {
const char *CurPtr;
StringRef CurBuf;
- // The line number of the start of the current token, zero-indexed
- unsigned CurTokLineNum = 0;
- // The column number of the start of the current token, zero-indexed
- unsigned CurTokColNum = 0;
- // The line number of the end of the current token, zero-indexed
- unsigned PrevTokEndLineNum = -1;
- // The column number of the end (exclusive) of the current token,
- // zero-indexed
- unsigned PrevTokEndColNum = -1;
+ // The the end (exclusive) of the current token
+ const char *PrevTokEnd = nullptr;
enum class ErrorPriority {
None, // No error message present.
@@ -87,16 +80,22 @@ namespace llvm {
IgnoreColonInIdentifiers = val;
}
- // Get the line number of the start of the current token, zero-indexed
- unsigned getTokLineNum() { return CurTokLineNum; }
- // Get the column number of the start of the current token, zero-indexed
- unsigned getTokColNum() { return CurTokColNum; }
- // Get the line number of the end of the previous token, zero-indexed,
- // exclusive
- unsigned getPrevTokEndLineNum() { return PrevTokEndLineNum; }
- // Get the column number of the end of the previous token, zero-indexed,
- // exclusive
- unsigned getPrevTokEndColNum() { return PrevTokEndColNum; }
+ // Get the line, column position of the start of the current token,
+ // zero-indexed
+ std::pair<unsigned, unsigned> getTokLineColumnPos() {
+ auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(TokStart));
+ --LC.first;
+ --LC.second;
+ return LC;
+ }
+ // Get the line, column position of the end of the previous token,
+ // zero-indexed exclusive
+ std::pair<unsigned, unsigned> getPrevTokEndLineColumnPos() {
+ auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(PrevTokEnd));
+ --LC.first;
+ --LC.second;
+ return LC;
+ }
// This returns true as a convenience for the parser functions that return
// true on error.
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 9e27797d151e2..2617981cc090c 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -68,6 +68,7 @@ struct FileLoc {
}
FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {}
+ FileLoc(std::pair<unsigned, unsigned> LC) : Line(LC.first), Col(LC.second) {}
};
struct FileLocRange {
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 8ce963702f330..0e378bc81fd69 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -205,16 +205,9 @@ void LLLexer::advancePositionTo(const char *Ptr) {
lltok::Kind LLLexer::LexToken() {
// Set token end to next location, since the end is
// exclusive
- std::tie(PrevTokEndLineNum, PrevTokEndColNum) =
- SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr));
- --PrevTokEndLineNum;
- --PrevTokEndColNum;
+ PrevTokEnd = CurPtr;
while (true) {
TokStart = CurPtr;
- std::tie(CurTokLineNum, CurTokColNum) =
- SM.getLineAndColumn(SMLoc::getFromPointer(CurPtr));
- --CurTokLineNum;
- --CurTokColNum;
int CurChar = getNextChar();
switch (CurChar) {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 03fe1097f8612..65daaf5be318d 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -740,7 +740,7 @@ bool LLParser::parseDeclare() {
/// ::= 'define' FunctionHeader (!dbg !56)* '{' ...
bool LLParser::parseDefine() {
assert(Lex.getKind() == lltok::kw_define);
- FileLoc FunctionStart(Lex.getTokLineNum(), Lex.getTokColNum());
+ FileLoc FunctionStart(Lex.getTokLineColumnPos());
Lex.Lex();
Function *F;
@@ -752,8 +752,7 @@ bool LLParser::parseDefine() {
parseFunctionBody(*F, FunctionNumber, UnnamedArgNums);
if (ParserContext)
ParserContext->addFunctionLocation(
- F, FileLocRange(FunctionStart, {Lex.getPrevTokEndLineNum(),
- Lex.getPrevTokEndColNum()}));
+ F, FileLocRange(FunctionStart, Lex.getPrevTokEndLineColumnPos()));
return RetValue;
}
@@ -6959,7 +6958,7 @@ bool LLParser::parseFunctionBody(Function &Fn, unsigned FunctionNumber,
/// parseBasicBlock
/// ::= (LabelStr|LabelID)? Instruction*
bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
- FileLoc BBStart(Lex.getTokLineNum(), Lex.getTokColNum());
+ FileLoc BBStart(Lex.getTokLineColumnPos());
// If this basic block starts out with a name, remember it.
std::string Name;
@@ -7002,7 +7001,7 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
TrailingDbgRecord.emplace_back(DR, DeleteDbgRecord);
}
- FileLoc InstStart(Lex.getTokLineNum(), Lex.getTokColNum());
+ FileLoc InstStart(Lex.getTokLineColumnPos());
// This instruction may have three possibilities for a name: a) none
// specified, b) name specified "%foo =", c) number specified: "%4 =".
LocTy NameLoc = Lex.getLoc();
@@ -7054,15 +7053,13 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
TrailingDbgRecord.clear();
if (ParserContext) {
ParserContext->addInstructionLocation(
- Inst, FileLocRange(InstStart, {Lex.getPrevTokEndLineNum(),
- Lex.getPrevTokEndColNum()}));
+ Inst, FileLocRange(InstStart, Lex.getPrevTokEndLineColumnPos()));
}
} while (!Inst->isTerminator());
if (ParserContext)
ParserContext->addBlockLocation(
- BB, FileLocRange(BBStart, {Lex.getPrevTokEndLineNum(),
- Lex.getPrevTokEndColNum()}));
+ BB, FileLocRange(BBStart, Lex.getPrevTokEndLineColumnPos()));
assert(TrailingDbgRecord.empty() &&
"All debug values should have been attached to an instruction.");
>From 06d526544de7973b5abf4779c08bc2e45c444983 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Thu, 4 Sep 2025 09:11:51 +0000
Subject: [PATCH 12/31] Use nullptr as missing value
---
.../include/llvm/AsmParser/AsmParserContext.h | 31 ++++++++++++++-----
llvm/lib/AsmParser/AsmParserContext.cpp | 20 ++++++------
2 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index 39c0e0d9df4de..092485d23437d 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -36,13 +36,30 @@ class AsmParserContext {
std::optional<FileLocRange> getFunctionLocation(const Function *) const;
std::optional<FileLocRange> getBlockLocation(const BasicBlock *) const;
std::optional<FileLocRange> getInstructionLocation(const Instruction *) const;
- std::optional<Function *> getFunctionAtLocation(const FileLocRange &) const;
- std::optional<Function *> getFunctionAtLocation(const FileLoc &) const;
- std::optional<BasicBlock *> getBlockAtLocation(const FileLocRange &) const;
- std::optional<BasicBlock *> getBlockAtLocation(const FileLoc &) const;
- std::optional<Instruction *>
- getInstructionAtLocation(const FileLocRange &) const;
- std::optional<Instruction *> getInstructionAtLocation(const FileLoc &) const;
+ /// Get the function at the requested location range.
+ /// If no single function occupies the queried range, or the record is
+ /// missing, a nullptr is returned.
+ Function *getFunctionAtLocation(const FileLocRange &) const;
+ /// Get the function at the requested location.
+ /// If no function occupies the queried location, or the record is missing, a
+ /// nullptr is returned.
+ Function *getFunctionAtLocation(const FileLoc &) const;
+ /// Get the block at the requested location range.
+ /// If no single block occupies the queried range, or the record is missing, a
+ /// nullptr is returned.
+ BasicBlock *getBlockAtLocation(const FileLocRange &) const;
+ /// Get the block at the requested location.
+ /// If no block occupies the queried location, or the record is missing, a
+ /// nullptr is returned.
+ BasicBlock *getBlockAtLocation(const FileLoc &) const;
+ /// Get the instruction at the requested location range.
+ /// If no single instruction occupies the queried range, or the record is
+ /// missing, a nullptr is returned.
+ Instruction *getInstructionAtLocation(const FileLocRange &) const;
+ /// Get the instruction at the requested location.
+ /// If no instruction occupies the queried location, or the record is missing,
+ /// a nullptr is returned.
+ Instruction *getInstructionAtLocation(const FileLoc &) const;
bool addFunctionLocation(Function *, const FileLocRange &);
bool addBlockLocation(BasicBlock *, const FileLocRange &);
bool addInstructionLocation(Instruction *, const FileLocRange &);
diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp
index f5e3d83f5d346..7de2bfc67acfb 100644
--- a/llvm/lib/AsmParser/AsmParserContext.cpp
+++ b/llvm/lib/AsmParser/AsmParserContext.cpp
@@ -31,44 +31,42 @@ AsmParserContext::getInstructionLocation(const Instruction *I) const {
return Instructions.at(I);
}
-std::optional<Function *>
+Function *
AsmParserContext::getFunctionAtLocation(const FileLocRange &Query) const {
for (auto &[F, Loc] : Functions) {
if (Loc.contains(Query))
return F;
}
- return std::nullopt;
+ return nullptr;
}
-std::optional<Function *>
-AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const {
+Function *AsmParserContext::getFunctionAtLocation(const FileLoc &Query) const {
return getFunctionAtLocation(FileLocRange(Query, Query));
}
-std::optional<BasicBlock *>
+BasicBlock *
AsmParserContext::getBlockAtLocation(const FileLocRange &Query) const {
for (auto &[BB, Loc] : Blocks) {
if (Loc.contains(Query))
return BB;
}
- return std::nullopt;
+ return nullptr;
}
-std::optional<BasicBlock *>
-AsmParserContext::getBlockAtLocation(const FileLoc &Query) const {
+BasicBlock *AsmParserContext::getBlockAtLocation(const FileLoc &Query) const {
return getBlockAtLocation(FileLocRange(Query, Query));
}
-std::optional<Instruction *>
+Instruction *
AsmParserContext::getInstructionAtLocation(const FileLocRange &Query) const {
for (auto &[I, Loc] : Instructions) {
if (Loc.contains(Query))
return I;
}
- return std::nullopt;
+ return nullptr;
}
-std::optional<Instruction *>
+Instruction *
AsmParserContext::getInstructionAtLocation(const FileLoc &Query) const {
return getInstructionAtLocation(FileLocRange(Query, Query));
}
>From 4e08921093d0031fa135f5ffff55ff6beabe9e3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Thu, 4 Sep 2025 09:12:32 +0000
Subject: [PATCH 13/31] Enclose debug prints of tests in LLVM_DEBUG
---
llvm/unittests/AsmParser/AsmParserTest.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 49f59696709f2..700864c2d12b9 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -22,6 +22,8 @@
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"
+#define DEBUG_TYPE "Unittest-asm-parser-tests"
+
using namespace llvm;
namespace {
@@ -486,14 +488,14 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) {
#define ASSERT_EQ_LOC(Loc1, Loc2) \
do { \
bool AreLocsEqual = Loc1.contains(Loc2) && Loc2.contains(Loc1); \
- if (!AreLocsEqual) { \
+ LLVM_DEBUG(if (!AreLocsEqual) { \
dbgs() << #Loc1 " location: " << Loc1.Start.Line << ":" \
<< Loc1.Start.Col << " - " << Loc1.End.Line << ":" \
<< Loc1.End.Col << "\n"; \
dbgs() << #Loc2 " location: " << Loc2.Start.Line << ":" \
<< Loc2.Start.Col << " - " << Loc2.End.Line << ":" \
<< Loc2.End.Col << "\n"; \
- } \
+ }); \
ASSERT_TRUE(AreLocsEqual); \
} while (false)
>From 3da9e9db2de1dfcea062522eb2ebdcd7d2eba715 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Mon, 15 Sep 2025 11:42:06 +0000
Subject: [PATCH 14/31] Decapitalize DEBUG_TYPE
---
llvm/unittests/AsmParser/AsmParserTest.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 700864c2d12b9..0ca21eaea800f 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -22,7 +22,7 @@
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"
-#define DEBUG_TYPE "Unittest-asm-parser-tests"
+#define DEBUG_TYPE "unittest-asm-parser-tests"
using namespace llvm;
>From 4b3bc0ee923a1c402d1279696d191f1907c51d11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Fri, 26 Sep 2025 10:11:24 +0000
Subject: [PATCH 15/31] Move FileLoc from Value.h to FileLoc.h
---
.../include/llvm/AsmParser/AsmParserContext.h | 1 +
llvm/include/llvm/AsmParser/FileLoc.h | 48 +++++++++++++++++++
llvm/include/llvm/IR/Value.h | 33 -------------
3 files changed, 49 insertions(+), 33 deletions(-)
create mode 100644 llvm/include/llvm/AsmParser/FileLoc.h
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index 092485d23437d..0bc383ff147fd 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -9,6 +9,7 @@
#ifndef LLVM_ASMPARSER_ASMPARSER_STATE_H
#define LLVM_ASMPARSER_ASMPARSER_STATE_H
+#include "FileLoc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Value.h"
#include <optional>
diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h
new file mode 100644
index 0000000000000..66fd4f21df9c0
--- /dev/null
+++ b/llvm/include/llvm/AsmParser/FileLoc.h
@@ -0,0 +1,48 @@
+//===-- FileLoc.h ---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_FILELOC_H
+#define LLVM_ASMPARSER_FILELOC_H
+
+#include <assert.h>
+#include <utility>
+
+struct FileLoc {
+ unsigned Line;
+ unsigned Col;
+
+ bool operator<=(const FileLoc &RHS) const {
+ return Line < RHS.Line || (Line == RHS.Line && Col <= RHS.Col);
+ }
+
+ bool operator<(const FileLoc &RHS) const {
+ return Line < RHS.Line || (Line == RHS.Line && Col < RHS.Col);
+ }
+
+ FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {}
+ FileLoc(std::pair<unsigned, unsigned> LC) : Line(LC.first), Col(LC.second) {}
+};
+
+struct FileLocRange {
+ FileLoc Start;
+ FileLoc End;
+
+ FileLocRange() : Start(0, 0), End(0, 0) {}
+
+ FileLocRange(FileLoc S, FileLoc E) : Start(S), End(E) {
+ assert(Start <= End);
+ }
+
+ bool contains(FileLoc L) const { return Start <= L && L <= End; }
+
+ bool contains(FileLocRange LR) const {
+ return contains(LR.Start) && contains(LR.End);
+ }
+};
+
+#endif
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 2617981cc090c..04d0391c04098 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -55,39 +55,6 @@ class User;
using ValueName = StringMapEntry<Value *>;
-struct FileLoc {
- unsigned Line;
- unsigned Col;
-
- bool operator<=(const FileLoc &RHS) const {
- return Line < RHS.Line || (Line == RHS.Line && Col <= RHS.Col);
- }
-
- bool operator<(const FileLoc &RHS) const {
- return Line < RHS.Line || (Line == RHS.Line && Col < RHS.Col);
- }
-
- FileLoc(unsigned L, unsigned C) : Line(L), Col(C) {}
- FileLoc(std::pair<unsigned, unsigned> LC) : Line(LC.first), Col(LC.second) {}
-};
-
-struct FileLocRange {
- FileLoc Start;
- FileLoc End;
-
- FileLocRange() : Start(0, 0), End(0, 0) {}
-
- FileLocRange(FileLoc S, FileLoc E) : Start(S), End(E) {
- assert(Start <= End);
- }
-
- bool contains(FileLoc L) const { return Start <= L && L <= End; }
-
- bool contains(FileLocRange LR) const {
- return contains(LR.Start) && contains(LR.End);
- }
-};
-
//===----------------------------------------------------------------------===//
// Value Class
//===----------------------------------------------------------------------===//
>From ed7a04a5c6ab05963bdde3285e663280a78434c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Fri, 26 Sep 2025 10:23:44 +0000
Subject: [PATCH 16/31] Rename include guard defines to reflext filename
---
llvm/include/llvm/AsmParser/AsmParserContext.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index 0bc383ff147fd..eb4b9c4013b9a 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ASMPARSER_ASMPARSER_STATE_H
-#define LLVM_ASMPARSER_ASMPARSER_STATE_H
+#ifndef LLVM_ASMPARSER_ASMPARSERCONTEXT_H
+#define LLVM_ASMPARSER_ASMPARSERCONTEXT_H
#include "FileLoc.h"
#include "llvm/ADT/DenseMap.h"
>From e6142b5bd2b768d65ee1215e1add309ddeff197e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 1 Oct 2025 12:14:41 +0000
Subject: [PATCH 17/31] include in namespace llvm
---
llvm/include/llvm/AsmParser/FileLoc.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h
index 66fd4f21df9c0..d209fd56f9bfb 100644
--- a/llvm/include/llvm/AsmParser/FileLoc.h
+++ b/llvm/include/llvm/AsmParser/FileLoc.h
@@ -12,6 +12,8 @@
#include <assert.h>
#include <utility>
+namespace llvm {
+
struct FileLoc {
unsigned Line;
unsigned Col;
@@ -45,4 +47,6 @@ struct FileLocRange {
}
};
+} // namespace llvm
+
#endif
>From f5da73c5c3df56034de8caeb7fd4aea256c8c040 Mon Sep 17 00:00:00 2001
From: Bertik23 <39457484+Bertik23 at users.noreply.github.com>
Date: Mon, 6 Oct 2025 15:20:10 +0200
Subject: [PATCH 18/31] Fix typo in comment
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index eb4b9c4013b9a..95d8ab6c8cafe 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -16,7 +16,7 @@
namespace llvm {
-/// Registry of file location information for LLVM IR constructs
+/// Registry of file location information for LLVM IR constructs.
///
/// This class provides access to the file location information
/// for various LLVM IR constructs. Currently, it supports Function,
>From 10a2b75948159f5482103b402602fba0a85e332f Mon Sep 17 00:00:00 2001
From: Bertik23 <39457484+Bertik23 at users.noreply.github.com>
Date: Mon, 6 Oct 2025 15:22:11 +0200
Subject: [PATCH 19/31] Path to llvm/AsmParser/FileLoc.h
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index 95d8ab6c8cafe..54c0de0f1fcf7 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -9,7 +9,7 @@
#ifndef LLVM_ASMPARSER_ASMPARSERCONTEXT_H
#define LLVM_ASMPARSER_ASMPARSERCONTEXT_H
-#include "FileLoc.h"
+#include "llvm/AsmParser/FileLoc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Value.h"
#include <optional>
>From 17b5753f286472c28a4fd6e5374d6c0b397058bb Mon Sep 17 00:00:00 2001
From: Bertik23 <39457484+Bertik23 at users.noreply.github.com>
Date: Mon, 6 Oct 2025 15:22:43 +0200
Subject: [PATCH 20/31] assert.h -> cassert
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/include/llvm/AsmParser/FileLoc.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h
index d209fd56f9bfb..0a7045a089c95 100644
--- a/llvm/include/llvm/AsmParser/FileLoc.h
+++ b/llvm/include/llvm/AsmParser/FileLoc.h
@@ -9,7 +9,7 @@
#ifndef LLVM_ASMPARSER_FILELOC_H
#define LLVM_ASMPARSER_FILELOC_H
-#include <assert.h>
+#include <cassert>
#include <utility>
namespace llvm {
>From 737c5e02ed1f1350f9c1121ecc522f15719a832f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 08:48:42 +0000
Subject: [PATCH 21/31] Remove filename and emacs marker
---
llvm/include/llvm/AsmParser/AsmParserContext.h | 2 +-
llvm/lib/AsmParser/AsmParserContext.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/AsmParserContext.h b/llvm/include/llvm/AsmParser/AsmParserContext.h
index 54c0de0f1fcf7..89b1626c48b6d 100644
--- a/llvm/include/llvm/AsmParser/AsmParserContext.h
+++ b/llvm/include/llvm/AsmParser/AsmParserContext.h
@@ -1,4 +1,4 @@
-//===-- AsmParserContext.h --------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp
index 7de2bfc67acfb..43f33652efc28 100644
--- a/llvm/lib/AsmParser/AsmParserContext.cpp
+++ b/llvm/lib/AsmParser/AsmParserContext.cpp
@@ -1,4 +1,4 @@
-//===-- AsmParserContext.cpp ------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From 72b89e5a4aba84d1ab0f2c648accd1d0e77f3dea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 09:22:11 +0000
Subject: [PATCH 22/31] optimize lookup
---
llvm/lib/AsmParser/AsmParserContext.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/AsmParser/AsmParserContext.cpp b/llvm/lib/AsmParser/AsmParserContext.cpp
index 43f33652efc28..59d3ffcb470e4 100644
--- a/llvm/lib/AsmParser/AsmParserContext.cpp
+++ b/llvm/lib/AsmParser/AsmParserContext.cpp
@@ -12,23 +12,23 @@ namespace llvm {
std::optional<FileLocRange>
AsmParserContext::getFunctionLocation(const Function *F) const {
- if (!Functions.contains(F))
- return std::nullopt;
- return Functions.at(F);
+ if (auto FIt = Functions.find(F); FIt != Functions.end())
+ return FIt->second;
+ return std::nullopt;
}
std::optional<FileLocRange>
AsmParserContext::getBlockLocation(const BasicBlock *BB) const {
- if (!Blocks.contains(BB))
- return std::nullopt;
- return Blocks.at(BB);
+ if (auto BBIt = Blocks.find(BB); BBIt != Blocks.end())
+ return BBIt->second;
+ return std::nullopt;
}
std::optional<FileLocRange>
AsmParserContext::getInstructionLocation(const Instruction *I) const {
- if (!Instructions.contains(I))
- return std::nullopt;
- return Instructions.at(I);
+ if (auto IIt = Instructions.find(I); IIt != Instructions.end())
+ return IIt->second;
+ return std::nullopt;
}
Function *
>From 41284dfb8403dff7681ce7c4222b8211fa773ce9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 09:50:59 +0000
Subject: [PATCH 23/31] FileLoc docs and fix reange
---
llvm/include/llvm/AsmParser/FileLoc.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h
index 0a7045a089c95..155780a076587 100644
--- a/llvm/include/llvm/AsmParser/FileLoc.h
+++ b/llvm/include/llvm/AsmParser/FileLoc.h
@@ -14,8 +14,11 @@
namespace llvm {
+/// Struct holding Line:Column location
struct FileLoc {
+ // 0-based line number
unsigned Line;
+ // 0-based column number
unsigned Col;
bool operator<=(const FileLoc &RHS) const {
@@ -30,6 +33,7 @@ struct FileLoc {
FileLoc(std::pair<unsigned, unsigned> LC) : Line(LC.first), Col(LC.second) {}
};
+// Struct holding a semiopen range [Start; End)
struct FileLocRange {
FileLoc Start;
FileLoc End;
@@ -43,7 +47,7 @@ struct FileLocRange {
bool contains(FileLoc L) const { return Start <= L && L <= End; }
bool contains(FileLocRange LR) const {
- return contains(LR.Start) && contains(LR.End);
+ return Start <= LR.Start && LR.End <= End;
}
};
>From ff9a33d4665b2615a0f49a3cc6de865f1ebe6ef5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 09:54:29 +0000
Subject: [PATCH 24/31] full path to includes
---
llvm/include/llvm/AsmParser/LLLexer.h | 2 +-
llvm/include/llvm/AsmParser/LLParser.h | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 5e4d43ebbd4ed..de2c44da9f9d3 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -13,9 +13,9 @@
#ifndef LLVM_ASMPARSER_LLLEXER_H
#define LLVM_ASMPARSER_LLLEXER_H
-#include "LLToken.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/AsmParser/LLToken.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include <string>
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 02460e5e52203..dd3360c022829 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -13,9 +13,9 @@
#ifndef LLVM_ASMPARSER_LLPARSER_H
#define LLVM_ASMPARSER_LLPARSER_H
-#include "AsmParserContext.h"
-#include "LLLexer.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/AsmParser/AsmParserContext.h"
+#include "llvm/AsmParser/LLLexer.h"
#include "llvm/AsmParser/NumberedValues.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/Attributes.h"
>From 008ae63dff7a6a6cf01190d154d655790f8f18b4 Mon Sep 17 00:00:00 2001
From: Bertik23 <39457484+Bertik23 at users.noreply.github.com>
Date: Wed, 8 Oct 2025 12:45:44 +0200
Subject: [PATCH 25/31] Apply suggestion from @nikic
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/include/llvm/AsmParser/LLLexer.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index de2c44da9f9d3..c38bf5d148ed2 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -84,9 +84,7 @@ namespace llvm {
// zero-indexed
std::pair<unsigned, unsigned> getTokLineColumnPos() {
auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(TokStart));
- --LC.first;
- --LC.second;
- return LC;
+ return {LC.first - 1, LC.second - 1};
}
// Get the line, column position of the end of the previous token,
// zero-indexed exclusive
>From a44ef20c291a25df7705bf2a9abdbb221dc7a5ce Mon Sep 17 00:00:00 2001
From: Bertik23 <39457484+Bertik23 at users.noreply.github.com>
Date: Wed, 8 Oct 2025 12:51:24 +0200
Subject: [PATCH 26/31] Typo add period
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/include/llvm/AsmParser/LLParser.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index dd3360c022829..9eb31d7e0a451 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -178,7 +178,7 @@ namespace llvm {
// Map of module ID to path.
std::map<unsigned, StringRef> ModuleIdMap;
- /// Keeps track of source locations for Values, BasicBlocks, and Functions
+ /// Keeps track of source locations for Values, BasicBlocks, and Functions.
AsmParserContext *ParserContext;
/// Only the llvm-as tool may set this to false to bypass
>From f201d1f4e1dbeaefb7fa8764272755a3c3919891 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 10:26:33 +0000
Subject: [PATCH 27/31] actually fix filelocrange openness
---
llvm/include/llvm/AsmParser/FileLoc.h | 2 +-
llvm/unittests/AsmParser/AsmParserTest.cpp | 17 ++++++-----------
2 files changed, 7 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h
index 155780a076587..d1b22f5187e75 100644
--- a/llvm/include/llvm/AsmParser/FileLoc.h
+++ b/llvm/include/llvm/AsmParser/FileLoc.h
@@ -44,7 +44,7 @@ struct FileLocRange {
assert(Start <= End);
}
- bool contains(FileLoc L) const { return Start <= L && L <= End; }
+ bool contains(FileLoc L) const { return Start <= L && L < End; }
bool contains(FileLocRange LR) const {
return Start <= LR.Start && LR.End <= End;
diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 0ca21eaea800f..32ca4b2c29493 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -487,16 +487,11 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) {
#define ASSERT_EQ_LOC(Loc1, Loc2) \
do { \
- bool AreLocsEqual = Loc1.contains(Loc2) && Loc2.contains(Loc1); \
- LLVM_DEBUG(if (!AreLocsEqual) { \
- dbgs() << #Loc1 " location: " << Loc1.Start.Line << ":" \
- << Loc1.Start.Col << " - " << Loc1.End.Line << ":" \
- << Loc1.End.Col << "\n"; \
- dbgs() << #Loc2 " location: " << Loc2.Start.Line << ":" \
- << Loc2.Start.Col << " - " << Loc2.End.Line << ":" \
- << Loc2.End.Col << "\n"; \
- }); \
- ASSERT_TRUE(AreLocsEqual); \
+ EXPECT_TRUE(Loc1.contains(Loc2) && Loc2.contains(Loc1)) \
+ << #Loc1 " location: " << Loc1.Start.Line << ":" << Loc1.Start.Col \
+ << " - " << Loc1.End.Line << ":" << Loc1.End.Col << "\n" \
+ << #Loc2 " location: " << Loc2.Start.Line << ":" << Loc2.Start.Col \
+ << " - " << Loc2.End.Line << ":" << Loc2.End.Col << "\n"; \
} while (false)
TEST(AsmParserTest, ParserObjectLocations) {
@@ -517,7 +512,7 @@ TEST(AsmParserTest, ParserObjectLocations) {
ASSERT_TRUE(MainFn != nullptr);
auto MaybeMainLoc = ParserContext.getFunctionLocation(MainFn);
- ASSERT_TRUE(MaybeMainLoc.has_value());
+ EXPECT_TRUE(MaybeMainLoc.has_value());
auto MainLoc = MaybeMainLoc.value();
auto ExpectedMainLoc = FileLocRange(FileLoc{0, 0}, FileLoc{4, 1});
ASSERT_EQ_LOC(MainLoc, ExpectedMainLoc);
>From 1de2447dbd77bcd4cd920b6409da72753743484b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 10:37:18 +0000
Subject: [PATCH 28/31] remove old irrelevant comment
---
llvm/unittests/AsmParser/AsmParserTest.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 32ca4b2c29493..898a8293925b6 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -495,8 +495,6 @@ TEST(AsmParserTest, DIExpressionBodyAtBeginningWithSlotMappingParsing) {
} while (false)
TEST(AsmParserTest, ParserObjectLocations) {
- // Expected to fail with function location starting one character later, needs
- // a fix
StringRef Source = "define i32 @main() {\n"
"entry:\n"
" %a = add i32 1, 2\n"
>From 4d5183995957e289d0b2b3e14e71890e1e375993 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 10:53:11 +0000
Subject: [PATCH 29/31] Doc coments with ///
---
llvm/include/llvm/AsmParser/LLLexer.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index c38bf5d148ed2..4801a61c72348 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -29,7 +29,7 @@ namespace llvm {
const char *CurPtr;
StringRef CurBuf;
- // The the end (exclusive) of the current token
+ /// The end (exclusive) of the previous token.
const char *PrevTokEnd = nullptr;
enum class ErrorPriority {
@@ -80,14 +80,14 @@ namespace llvm {
IgnoreColonInIdentifiers = val;
}
- // Get the line, column position of the start of the current token,
- // zero-indexed
+ /// Get the line, column position of the start of the current token,
+ /// zero-indexed
std::pair<unsigned, unsigned> getTokLineColumnPos() {
auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(TokStart));
return {LC.first - 1, LC.second - 1};
}
- // Get the line, column position of the end of the previous token,
- // zero-indexed exclusive
+ /// Get the line, column position of the end of the previous token,
+ /// zero-indexed exclusive
std::pair<unsigned, unsigned> getPrevTokEndLineColumnPos() {
auto LC = SM.getLineAndColumn(SMLoc::getFromPointer(PrevTokEnd));
--LC.first;
>From 77385c085357f7b130abb056eb90987bc2bb83c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 10:56:11 +0000
Subject: [PATCH 30/31] Doc coments with ///
---
llvm/include/llvm/AsmParser/FileLoc.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/FileLoc.h b/llvm/include/llvm/AsmParser/FileLoc.h
index d1b22f5187e75..02c1849fa986e 100644
--- a/llvm/include/llvm/AsmParser/FileLoc.h
+++ b/llvm/include/llvm/AsmParser/FileLoc.h
@@ -16,9 +16,9 @@ namespace llvm {
/// Struct holding Line:Column location
struct FileLoc {
- // 0-based line number
+ /// 0-based line number
unsigned Line;
- // 0-based column number
+ /// 0-based column number
unsigned Col;
bool operator<=(const FileLoc &RHS) const {
@@ -33,7 +33,7 @@ struct FileLoc {
FileLoc(std::pair<unsigned, unsigned> LC) : Line(LC.first), Col(LC.second) {}
};
-// Struct holding a semiopen range [Start; End)
+/// Struct holding a semiopen range [Start; End)
struct FileLocRange {
FileLoc Start;
FileLoc End;
>From 75e5b57d7a5b8c0f1b7f13e00d00d090610bdf51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Albert=20Havli=C4=8Dek?= <ahavlicek at azul.com>
Date: Wed, 8 Oct 2025 11:14:14 +0000
Subject: [PATCH 31/31] Revert changes irrelevant in LLLexer
---
llvm/include/llvm/AsmParser/LLLexer.h | 5 -
llvm/lib/AsmParser/LLLexer.cpp | 141 ++++++++++++--------------
2 files changed, 64 insertions(+), 82 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 4801a61c72348..8f0ae6989d7d4 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -109,12 +109,7 @@ namespace llvm {
private:
lltok::Kind LexToken();
- // Return closest pointer after `Ptr` that is an end of a label.
- // Returns nullptr if `Ptr` doesn't point into a label.
- const char *getLabelTail(const char *Ptr);
int getNextChar();
- const char *skipNChars(unsigned N);
- void advancePositionTo(const char *Ptr);
void SkipLineComment();
bool SkipCComment();
lltok::Kind ReadString(lltok::Kind kind);
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 585fc0c0fcaad..10ab4b658553c 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -155,6 +155,17 @@ static bool isLabelChar(char C) {
C == '.' || C == '_';
}
+/// isLabelTail - Return true if this pointer points to a valid end of a label.
+static const char *isLabelTail(const char *CurPtr) {
+ while (true) {
+ if (CurPtr[0] == ':')
+ return CurPtr + 1;
+ if (!isLabelChar(CurPtr[0]))
+ return nullptr;
+ ++CurPtr;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Lexer definition.
//===----------------------------------------------------------------------===//
@@ -165,41 +176,21 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
CurPtr = CurBuf.begin();
}
-const char *LLLexer::getLabelTail(const char *Ptr) {
- while (Ptr != CurBuf.end()) {
- if (Ptr[0] == ':')
- return Ptr + 1;
- if (!isLabelChar(Ptr[0]))
- return nullptr;
- ++Ptr;
- }
- return nullptr;
-}
-
int LLLexer::getNextChar() {
- if (CurPtr == CurBuf.end())
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr - 1 != CurBuf.end())
+ return 0; // Just whitespace.
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
return EOF;
- // Increment line number if this is the first character after a newline
- return *CurPtr++;
-}
-
-const char *LLLexer::skipNChars(unsigned N) {
- while (N--)
- getNextChar();
- return CurPtr;
-}
-
-void LLLexer::advancePositionTo(const char *Ptr) {
- if (CurBuf.begin() > Ptr) {
- CurPtr = CurBuf.begin();
- return;
}
- if (CurBuf.end() < Ptr) {
- CurPtr = CurBuf.end();
- return;
- }
-
- CurPtr = Ptr;
}
lltok::Kind LLLexer::LexToken() {
@@ -208,8 +199,8 @@ lltok::Kind LLLexer::LexToken() {
PrevTokEnd = CurPtr;
while (true) {
TokStart = CurPtr;
- int CurChar = getNextChar();
+ int CurChar = getNextChar();
switch (CurChar) {
default:
// Handle letters: [a-zA-Z_]
@@ -230,13 +221,13 @@ lltok::Kind LLLexer::LexToken() {
case '%': return LexPercent();
case '"': return LexQuote();
case '.':
- if (const char *Ptr = getLabelTail(CurPtr)) {
- advancePositionTo(Ptr);
+ if (const char *Ptr = isLabelTail(CurPtr)) {
+ CurPtr = Ptr;
StrVal.assign(TokStart, CurPtr-1);
return lltok::LabelStr;
}
if (CurPtr[0] == '.' && CurPtr[1] == '.') {
- skipNChars(2);
+ CurPtr += 2;
return lltok::dotdotdot;
}
return lltok::Error;
@@ -313,15 +304,15 @@ lltok::Kind LLLexer::LexAt() {
}
lltok::Kind LLLexer::LexDollar() {
- if (const char *Ptr = getLabelTail(TokStart)) {
- advancePositionTo(Ptr);
+ if (const char *Ptr = isLabelTail(TokStart)) {
+ CurPtr = Ptr;
StrVal.assign(TokStart, CurPtr - 1);
return lltok::LabelStr;
}
// Handle DollarStringConstant: $\"[^\"]*\"
if (CurPtr[0] == '"') {
- getNextChar();
+ ++CurPtr;
while (true) {
int CurChar = getNextChar();
@@ -373,11 +364,11 @@ bool LLLexer::ReadVarName() {
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_') {
- getNextChar();
+ ++CurPtr;
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_')
- getNextChar();
+ ++CurPtr;
StrVal.assign(NameStart, CurPtr);
return true;
@@ -391,8 +382,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
return lltok::Error;
- for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
- getNextChar())
+ for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
/*empty*/;
uint64_t Val = atoull(TokStart + 1, CurPtr);
@@ -405,7 +395,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
// Handle StringConstant: \"[^\"]*\"
if (CurPtr[0] == '"') {
- getNextChar();
+ ++CurPtr;
while (true) {
int CurChar = getNextChar();
@@ -451,7 +441,7 @@ lltok::Kind LLLexer::LexQuote() {
return kind;
if (CurPtr[0] == ':') {
- getNextChar();
+ ++CurPtr;
if (StringRef(StrVal).contains(0)) {
LexError("NUL character is not allowed in names");
kind = lltok::Error;
@@ -471,11 +461,11 @@ lltok::Kind LLLexer::LexExclaim() {
if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
- getNextChar();
+ ++CurPtr;
while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
CurPtr[0] == '-' || CurPtr[0] == '$' ||
CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
- getNextChar();
+ ++CurPtr;
StrVal.assign(TokStart+1, CurPtr); // Skip !
UnEscapeLexed(StrVal);
@@ -511,7 +501,7 @@ lltok::Kind LLLexer::LexIdentifier() {
const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
const char *KeywordEnd = nullptr;
- for (; isLabelChar(*CurPtr); getNextChar()) {
+ for (; isLabelChar(*CurPtr); ++CurPtr) {
// If we decide this is an integer, remember the end of the sequence.
if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
IntEnd = CurPtr;
@@ -523,8 +513,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// If we stopped due to a colon, unless we were directed to ignore it,
// this really is a label.
if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
- StrVal.assign(StartChar - 1, CurPtr);
- getNextChar();
+ StrVal.assign(StartChar - 1, CurPtr++);
return lltok::LabelStr;
}
@@ -532,7 +521,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// return it.
if (!IntEnd) IntEnd = CurPtr;
if (IntEnd != StartChar) {
- advancePositionTo(IntEnd);
+ CurPtr = IntEnd;
uint64_t NumBits = atoull(StartChar, CurPtr);
if (NumBits < IntegerType::MIN_INT_BITS ||
NumBits > IntegerType::MAX_INT_BITS) {
@@ -545,7 +534,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// Otherwise, this was a letter sequence. See which keyword this is.
if (!KeywordEnd) KeywordEnd = CurPtr;
- advancePositionTo(KeywordEnd);
+ CurPtr = KeywordEnd;
--StartChar;
StringRef Keyword(StartChar, CurPtr - StartChar);
@@ -1063,7 +1052,7 @@ lltok::Kind LLLexer::LexIdentifier() {
StringRef HexStr(TokStart + 3, len);
if (!all_of(HexStr, isxdigit)) {
// Bad token, return it as an error.
- advancePositionTo(TokStart + 3);
+ CurPtr = TokStart + 3;
return lltok::Error;
}
APInt Tmp(bits, HexStr, 16);
@@ -1076,12 +1065,12 @@ lltok::Kind LLLexer::LexIdentifier() {
// If this is "cc1234", return this as just "cc".
if (TokStart[0] == 'c' && TokStart[1] == 'c') {
- advancePositionTo(TokStart + 2);
+ CurPtr = TokStart + 2;
return lltok::kw_cc;
}
// Finally, if this isn't known, return an error.
- advancePositionTo(TokStart + 1);
+ CurPtr = TokStart + 1;
return lltok::Error;
}
@@ -1094,25 +1083,24 @@ lltok::Kind LLLexer::LexIdentifier() {
/// HexHalfConstant 0xH[0-9A-Fa-f]+
/// HexBFloatConstant 0xR[0-9A-Fa-f]+
lltok::Kind LLLexer::Lex0x() {
- advancePositionTo(TokStart + 2);
+ CurPtr = TokStart + 2;
char Kind;
if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
CurPtr[0] == 'R') {
- Kind = *CurPtr;
- getNextChar();
+ Kind = *CurPtr++;
} else {
Kind = 'J';
}
if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
// Bad token, return it as an error.
- advancePositionTo(TokStart + 1);
+ CurPtr = TokStart + 1;
return lltok::Error;
}
while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
- getNextChar();
+ ++CurPtr;
if (Kind == 'J') {
// HexFPConstant - Floating point constant represented in IEEE format as a
@@ -1167,9 +1155,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
!isdigit(static_cast<unsigned char>(CurPtr[0]))) {
// Okay, this is not a number after the -, it's probably a label.
- if (const char *End = getLabelTail(CurPtr)) {
+ if (const char *End = isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
- advancePositionTo(End);
+ CurPtr = End;
return lltok::LabelStr;
}
@@ -1179,13 +1167,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// At this point, it is either a label, int or fp constant.
// Skip digits, we have at least one.
- for (; isdigit(static_cast<unsigned char>(CurPtr[0])); getNextChar())
+ for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
/*empty*/;
// Check if this is a fully-numeric label:
if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
uint64_t Val = atoull(TokStart, CurPtr);
- getNextChar(); // Skip the colon.
+ ++CurPtr; // Skip the colon.
if ((unsigned)Val != Val)
LexError("invalid value number (too large)");
UIntVal = unsigned(Val);
@@ -1194,9 +1182,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// Check to see if this really is a string label, e.g. "-1:".
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
- if (const char *End = getLabelTail(CurPtr)) {
+ if (const char *End = isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
- advancePositionTo(End);
+ CurPtr = End;
return lltok::LabelStr;
}
}
@@ -1210,19 +1198,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
return lltok::APSInt;
}
- getNextChar();
+ ++CurPtr;
// Skip over [0-9]*([eE][-+]?[0-9]+)?
while (isdigit(static_cast<unsigned char>(CurPtr[0])))
- getNextChar();
+ ++CurPtr;
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
- skipNChars(2);
+ CurPtr += 2;
while (isdigit(static_cast<unsigned char>(CurPtr[0])))
- getNextChar();
+ ++CurPtr;
}
}
@@ -1240,29 +1228,28 @@ lltok::Kind LLLexer::LexPositive() {
return lltok::Error;
// Skip digits.
- for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
- getNextChar())
+ for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
/*empty*/;
// At this point, we need a '.'.
if (CurPtr[0] != '.') {
- advancePositionTo(TokStart + 1);
+ CurPtr = TokStart + 1;
return lltok::Error;
}
- getNextChar();
+ ++CurPtr;
// Skip over [0-9]*([eE][-+]?[0-9]+)?
while (isdigit(static_cast<unsigned char>(CurPtr[0])))
- getNextChar();
+ ++CurPtr;
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
((CurPtr[1] == '-' || CurPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(CurPtr[2])))) {
- skipNChars(2);
+ CurPtr += 2;
while (isdigit(static_cast<unsigned char>(CurPtr[0])))
- getNextChar();
+ ++CurPtr;
}
}
More information about the llvm-commits
mailing list