[clang] [clang] Clarify SourceLocation and (Char)SourceRange docs (PR #177400)
Tobias Ribizel via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 3 11:46:23 PST 2026
https://github.com/upsj updated https://github.com/llvm/llvm-project/pull/177400
>From 9615e09e8a61a564c2fbaa31967f505590c6beaa Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail at ribizel.de>
Date: Thu, 22 Jan 2026 17:41:44 +0100
Subject: [PATCH 1/4] [clang] format SourceLocation.h
---
clang/include/clang/Basic/SourceLocation.h | 133 ++++++++++-----------
1 file changed, 61 insertions(+), 72 deletions(-)
diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index bd0038d5ae1ae..aab021ad7b610 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -104,7 +104,7 @@ class SourceLocation {
enum : UIntTy { MacroIDBit = 1ULL << (8 * sizeof(UIntTy) - 1) };
public:
- bool isFileID() const { return (ID & MacroIDBit) == 0; }
+ bool isFileID() const { return (ID & MacroIDBit) == 0; }
bool isMacroID() const { return (ID & MacroIDBit) != 0; }
/// Return true if this is a valid SourceLocation object.
@@ -137,9 +137,9 @@ class SourceLocation {
/// Return a source location with the specified offset from this
/// SourceLocation.
SourceLocation getLocWithOffset(IntTy Offset) const {
- assert(((getOffset()+Offset) & MacroIDBit) == 0 && "offset overflow");
+ assert(((getOffset() + Offset) & MacroIDBit) == 0 && "offset overflow");
SourceLocation L;
- L.ID = ID+Offset;
+ L.ID = ID + Offset;
return L;
}
@@ -165,10 +165,10 @@ class SourceLocation {
///
/// This should only be passed to SourceLocation::getFromPtrEncoding, it
/// should not be inspected directly.
- void* getPtrEncoding() const {
+ void *getPtrEncoding() const {
// Double cast to avoid a warning "cast to pointer from integer of different
// size".
- return (void*)(uintptr_t)getRawEncoding();
+ return (void *)(uintptr_t)getRawEncoding();
}
/// Turn a pointer encoding of a SourceLocation object back
@@ -230,13 +230,9 @@ class SourceRange {
bool isValid() const { return B.isValid() && E.isValid(); }
bool isInvalid() const { return !isValid(); }
- bool operator==(const SourceRange &X) const {
- return B == X.B && E == X.E;
- }
+ bool operator==(const SourceRange &X) const { return B == X.B && E == X.E; }
- bool operator!=(const SourceRange &X) const {
- return B != X.B || E != X.E;
- }
+ bool operator!=(const SourceRange &X) const { return B != X.B || E != X.E; }
// Returns true iff other is wholly contained within this range.
bool fullyContains(const SourceRange &other) const {
@@ -446,7 +442,7 @@ class FullSourceLoc : public SourceLocation {
/// Comparison function class, useful for sorting FullSourceLocs.
struct BeforeThanCompare {
- bool operator()(const FullSourceLoc& lhs, const FullSourceLoc& rhs) const {
+ bool operator()(const FullSourceLoc &lhs, const FullSourceLoc &rhs) const {
return lhs.isBeforeInTranslationUnitThan(rhs);
}
};
@@ -456,14 +452,12 @@ class FullSourceLoc : public SourceLocation {
/// This is useful for debugging.
void dump() const;
- friend bool
- operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
+ friend bool operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
return LHS.getRawEncoding() == RHS.getRawEncoding() &&
- LHS.SrcMgr == RHS.SrcMgr;
+ LHS.SrcMgr == RHS.SrcMgr;
}
- friend bool
- operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
+ friend bool operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) {
return !(LHS == RHS);
}
};
@@ -472,73 +466,68 @@ class FullSourceLoc : public SourceLocation {
namespace llvm {
- /// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and
- /// DenseSets.
- template <>
- struct DenseMapInfo<clang::FileID, void> {
- static clang::FileID getEmptyKey() {
- return {};
- }
+/// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and
+/// DenseSets.
+template <> struct DenseMapInfo<clang::FileID, void> {
+ static clang::FileID getEmptyKey() { return {}; }
- static clang::FileID getTombstoneKey() {
- return clang::FileID::getSentinel();
- }
+ static clang::FileID getTombstoneKey() {
+ return clang::FileID::getSentinel();
+ }
- static unsigned getHashValue(clang::FileID S) {
- return S.getHashValue();
- }
+ static unsigned getHashValue(clang::FileID S) { return S.getHashValue(); }
- static bool isEqual(clang::FileID LHS, clang::FileID RHS) {
- return LHS == RHS;
- }
- };
+ static bool isEqual(clang::FileID LHS, clang::FileID RHS) {
+ return LHS == RHS;
+ }
+};
- /// Define DenseMapInfo so that SourceLocation's can be used as keys in
- /// DenseMap and DenseSet. This trait class is eqivalent to
- /// DenseMapInfo<unsigned> which uses SourceLocation::ID is used as a key.
- template <> struct DenseMapInfo<clang::SourceLocation, void> {
- static clang::SourceLocation getEmptyKey() {
- constexpr clang::SourceLocation::UIntTy Zero = 0;
- return clang::SourceLocation::getFromRawEncoding(~Zero);
- }
+/// Define DenseMapInfo so that SourceLocation's can be used as keys in
+/// DenseMap and DenseSet. This trait class is eqivalent to
+/// DenseMapInfo<unsigned> which uses SourceLocation::ID is used as a key.
+template <> struct DenseMapInfo<clang::SourceLocation, void> {
+ static clang::SourceLocation getEmptyKey() {
+ constexpr clang::SourceLocation::UIntTy Zero = 0;
+ return clang::SourceLocation::getFromRawEncoding(~Zero);
+ }
- static clang::SourceLocation getTombstoneKey() {
- constexpr clang::SourceLocation::UIntTy Zero = 0;
- return clang::SourceLocation::getFromRawEncoding(~Zero - 1);
- }
+ static clang::SourceLocation getTombstoneKey() {
+ constexpr clang::SourceLocation::UIntTy Zero = 0;
+ return clang::SourceLocation::getFromRawEncoding(~Zero - 1);
+ }
- static unsigned getHashValue(clang::SourceLocation Loc) {
- return Loc.getHashValue();
- }
+ static unsigned getHashValue(clang::SourceLocation Loc) {
+ return Loc.getHashValue();
+ }
- static bool isEqual(clang::SourceLocation LHS, clang::SourceLocation RHS) {
- return LHS == RHS;
- }
- };
+ static bool isEqual(clang::SourceLocation LHS, clang::SourceLocation RHS) {
+ return LHS == RHS;
+ }
+};
- // Allow calling FoldingSetNodeID::Add with SourceLocation object as parameter
- template <> struct FoldingSetTrait<clang::SourceLocation, void> {
- static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID);
- };
+// Allow calling FoldingSetNodeID::Add with SourceLocation object as parameter
+template <> struct FoldingSetTrait<clang::SourceLocation, void> {
+ static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID);
+};
- template <> struct DenseMapInfo<clang::SourceRange> {
- static clang::SourceRange getEmptyKey() {
- return DenseMapInfo<clang::SourceLocation>::getEmptyKey();
- }
+template <> struct DenseMapInfo<clang::SourceRange> {
+ static clang::SourceRange getEmptyKey() {
+ return DenseMapInfo<clang::SourceLocation>::getEmptyKey();
+ }
- static clang::SourceRange getTombstoneKey() {
- return DenseMapInfo<clang::SourceLocation>::getTombstoneKey();
- }
+ static clang::SourceRange getTombstoneKey() {
+ return DenseMapInfo<clang::SourceLocation>::getTombstoneKey();
+ }
- static unsigned getHashValue(clang::SourceRange Range) {
- return detail::combineHashValue(Range.getBegin().getHashValue(),
- Range.getEnd().getHashValue());
- }
+ static unsigned getHashValue(clang::SourceRange Range) {
+ return detail::combineHashValue(Range.getBegin().getHashValue(),
+ Range.getEnd().getHashValue());
+ }
- static bool isEqual(clang::SourceRange LHS, clang::SourceRange RHS) {
- return LHS == RHS;
- }
- };
+ static bool isEqual(clang::SourceRange LHS, clang::SourceRange RHS) {
+ return LHS == RHS;
+ }
+};
} // namespace llvm
>From 645ac26db1c0273b34901344d46ac552fa5e0cb8 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail at ribizel.de>
Date: Thu, 22 Jan 2026 17:42:14 +0100
Subject: [PATCH 2/4] [clang] improve documentation for SourceLocation and
(Char)SourceRange
Clarify that SourceLocation always operates on a character level
(even if referring to tokens) and SourceRange is inclusive,
while CharSourceRange is exclusive usually.
---
clang/include/clang/Basic/SourceLocation.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index aab021ad7b610..69fc69ebb946c 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -86,6 +86,10 @@ using FileIDAndOffset = std::pair<FileID, unsigned>;
/// In addition, one bit of SourceLocation is used for quick access to the
/// information whether the location is in a file or a macro expansion.
///
+/// SourceLocation operates on a character level, i.e. offsets describe
+/// character distances, but in most cases, they are used on a token level,
+/// where a SourceLocation points to the first character of a lexer token.
+///
/// It is important that this type remains small. It is currently 32 bits wide.
class SourceLocation {
friend class ASTReader;
@@ -212,6 +216,11 @@ inline bool operator>=(const SourceLocation &LHS, const SourceLocation &RHS) {
}
/// A trivial tuple used to represent a source range.
+///
+/// SourceRange is an inclusive range [begin, end] that contains its endpoints,
+/// and when referring to tokens, its begin SourceLocation usually points to
+/// the first character of the first token and its end SourceLocation points to
+/// the last character of the last token.
class SourceRange {
SourceLocation B;
SourceLocation E;
@@ -251,6 +260,15 @@ class SourceRange {
/// last token of the range (a "token range"). In the token range case, the
/// size of the last token must be measured to determine the actual end of the
/// range.
+///
+/// CharSourceRange is interpreted differently depending on whether it is a
+/// TokenRange or a CharRange.
+/// For a TokenRange, the range contains the endpoint, i.e. the token containing
+/// the end SourceLocation.
+/// For a CharRange, the range doesn't contain the endpoint, i.e. it ends at the
+/// character before the end SourceLocation. This allows representing a point
+/// CharRange [begin, begin) that points at the empty range right in front of
+/// the begin SourceLocation.
class CharSourceRange {
SourceRange Range;
bool IsTokenRange = false;
>From 0bad98f01fa54c21aea5cffd4640f4a3c9425923 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail at ribizel.de>
Date: Tue, 3 Feb 2026 19:43:36 +0100
Subject: [PATCH 3/4] clarify source location of end tokens
---
clang/include/clang/Basic/SourceLocation.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index 69fc69ebb946c..0afbeb0f9ba93 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -217,10 +217,10 @@ inline bool operator>=(const SourceLocation &LHS, const SourceLocation &RHS) {
/// A trivial tuple used to represent a source range.
///
-/// SourceRange is an inclusive range [begin, end] that contains its endpoints,
-/// and when referring to tokens, its begin SourceLocation usually points to
-/// the first character of the first token and its end SourceLocation points to
-/// the last character of the last token.
+/// When referring to tokens, a SourceRange is an inclusive range [begin, end]
+/// that contains its endpoints, its begin SourceLocation points to the first
+/// character of the first token and its end SourceLocation points to the first
+/// character of the last token.
class SourceRange {
SourceLocation B;
SourceLocation E;
>From 63daf12460949de82f80263bf20eedb274d8fffb Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail at ribizel.de>
Date: Tue, 3 Feb 2026 20:45:52 +0100
Subject: [PATCH 4/4] clarify distinction between bytes and characters
---
clang/include/clang/Basic/SourceLocation.h | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index 0afbeb0f9ba93..51afe7a083367 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -86,9 +86,9 @@ using FileIDAndOffset = std::pair<FileID, unsigned>;
/// In addition, one bit of SourceLocation is used for quick access to the
/// information whether the location is in a file or a macro expansion.
///
-/// SourceLocation operates on a character level, i.e. offsets describe
-/// character distances, but in most cases, they are used on a token level,
-/// where a SourceLocation points to the first character of a lexer token.
+/// SourceLocation operates on a byte level, i.e. offsets describe
+/// byte distances, but in most cases, they are used on a token level,
+/// where a SourceLocation points to the first byte of a lexer token.
///
/// It is important that this type remains small. It is currently 32 bits wide.
class SourceLocation {
@@ -219,8 +219,8 @@ inline bool operator>=(const SourceLocation &LHS, const SourceLocation &RHS) {
///
/// When referring to tokens, a SourceRange is an inclusive range [begin, end]
/// that contains its endpoints, its begin SourceLocation points to the first
-/// character of the first token and its end SourceLocation points to the first
-/// character of the last token.
+/// byte of the first token and its end SourceLocation points to the first byte
+/// of the last token.
class SourceRange {
SourceLocation B;
SourceLocation E;
@@ -253,9 +253,9 @@ class SourceRange {
void dump(const SourceManager &SM) const;
};
-/// Represents a character-granular source range.
+/// Represents a byte-granular source range.
///
-/// The underlying SourceRange can either specify the starting/ending character
+/// The underlying SourceRange can either specify the starting/ending byte
/// of the range, or it can specify the start of the range and the start of the
/// last token of the range (a "token range"). In the token range case, the
/// size of the last token must be measured to determine the actual end of the
@@ -266,7 +266,7 @@ class SourceRange {
/// For a TokenRange, the range contains the endpoint, i.e. the token containing
/// the end SourceLocation.
/// For a CharRange, the range doesn't contain the endpoint, i.e. it ends at the
-/// character before the end SourceLocation. This allows representing a point
+/// byte before the end SourceLocation. This allows representing a point
/// CharRange [begin, begin) that points at the empty range right in front of
/// the begin SourceLocation.
class CharSourceRange {
@@ -294,8 +294,8 @@ class CharSourceRange {
}
/// Return true if the end of this range specifies the start of
- /// the last token. Return false if the end of this range specifies the last
- /// character in the range.
+ /// the last token. Return false if the end of this range specifies the first
+ /// byte after the range.
bool isTokenRange() const { return IsTokenRange; }
bool isCharRange() const { return !IsTokenRange; }
More information about the cfe-commits
mailing list