[clang] [clang-format] Add functionality of getting info about numeric literals (PR #152878)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Aug 10 23:43:39 PDT 2025
https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/152878
>From 5f1ea996c590c07ab2da923ad1be1fbf7b7ccc8b Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Thu, 7 Aug 2025 23:18:03 -0700
Subject: [PATCH 1/2] [clang-format] Add functionality of getting info about
numeric literals
---
clang/lib/Format/CMakeLists.txt | 1 +
clang/lib/Format/NumericLiteralInfo.cpp | 65 +++++++++++++++++
clang/lib/Format/NumericLiteralInfo.h | 29 ++++++++
clang/unittests/Format/CMakeLists.txt | 1 +
.../Format/NumericLiteralInfoTest.cpp | 71 +++++++++++++++++++
5 files changed, 167 insertions(+)
create mode 100644 clang/lib/Format/NumericLiteralInfo.cpp
create mode 100644 clang/lib/Format/NumericLiteralInfo.h
create mode 100644 clang/unittests/Format/NumericLiteralInfoTest.cpp
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 9f4939824fdb8..24f435d2caee1 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -13,6 +13,7 @@ add_clang_library(clangFormat
MacroExpander.cpp
MatchFilePath.cpp
NamespaceEndCommentsFixer.cpp
+ NumericLiteralInfo.cpp
ObjCPropertyAttributeOrderFixer.cpp
QualifierAlignmentFixer.cpp
SortJavaScriptImports.cpp
diff --git a/clang/lib/Format/NumericLiteralInfo.cpp b/clang/lib/Format/NumericLiteralInfo.cpp
new file mode 100644
index 0000000000000..30284fbe0c51f
--- /dev/null
+++ b/clang/lib/Format/NumericLiteralInfo.cpp
@@ -0,0 +1,65 @@
+//===--- NumericLiteralInfo.cpp ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the functionality of getting information about a
+/// numeric literal string, including 0-based positions of the base letter, the
+/// decimal/hexadecimal point, the exponent letter, and the suffix, or npos if
+/// absent.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NumericLiteralInfo.h"
+#include "llvm/ADT/StringExtras.h"
+
+namespace clang {
+namespace format {
+
+using namespace llvm;
+
+NumericLiteralInfo::NumericLiteralInfo(StringRef Text, char Separator) {
+ if (Text.size() < 2)
+ return;
+
+ bool IsHex = false;
+ if (Text[0] == '0') {
+ switch (Text[1]) {
+ case 'x':
+ case 'X':
+ IsHex = true;
+ [[fallthrough]];
+ case 'b':
+ case 'B':
+ case 'o':
+ case 'O':
+ BaseLetterPos = 1; // e.g. 0xF
+ break;
+ }
+ }
+
+ DotPos = Text.find('.', BaseLetterPos == 1 ? 2 : 0); // e.g. 0x.1 or .1
+
+ // e.g. 1.e2 or 0xFp2
+ const auto Pos = DotPos != StringRef::npos ? DotPos + 1 : BaseLetterPos + 2;
+
+ ExponentLetterPos =
+ // Trim C++ user-defined suffix as in `1_Pa`.
+ (Separator == '\'' ? Text.substr(0, Text.find('_')) : Text)
+ .find_insensitive(IsHex ? 'p' : 'e', Pos);
+
+ const bool HasExponent = ExponentLetterPos != StringRef::npos;
+ SuffixPos = Text.find_if_not(
+ [&](char C) {
+ return (HasExponent || !IsHex ? isDigit : isHexDigit)(C) ||
+ C == Separator;
+ },
+ HasExponent ? ExponentLetterPos + 2 : Pos); // e.g. 1e-2f
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/NumericLiteralInfo.h b/clang/lib/Format/NumericLiteralInfo.h
new file mode 100644
index 0000000000000..0210f6c94d3e7
--- /dev/null
+++ b/clang/lib/Format/NumericLiteralInfo.h
@@ -0,0 +1,29 @@
+//===--- NumericLiteralInfo.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_NUMERICLITERALINFO_H
+#define LLVM_CLANG_LIB_FORMAT_NUMERICLITERALINFO_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+struct NumericLiteralInfo {
+ size_t BaseLetterPos = llvm::StringRef::npos; // as in 0b1, 0xF, etc.
+ size_t DotPos = llvm::StringRef::npos; // pos of decimal/hex point
+ size_t ExponentLetterPos = llvm::StringRef::npos; // as in 9e9 and 0xFp9
+ size_t SuffixPos = llvm::StringRef::npos; // starting pos of suffix
+
+ NumericLiteralInfo(llvm::StringRef Text, char Separator = '\'');
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index edfc8d7a5beaa..c4c7b483ba68e 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -27,6 +27,7 @@ add_distinct_clang_unittest(FormatTests
MacroExpanderTest.cpp
MatchFilePathTest.cpp
NamespaceEndCommentsFixerTest.cpp
+ NumericLiteralInfoTest.cpp
ObjCPropertyAttributeOrderFixerTest.cpp
QualifierFixerTest.cpp
SortImportsTestJS.cpp
diff --git a/clang/unittests/Format/NumericLiteralInfoTest.cpp b/clang/unittests/Format/NumericLiteralInfoTest.cpp
new file mode 100644
index 0000000000000..a892cfff531e3
--- /dev/null
+++ b/clang/unittests/Format/NumericLiteralInfoTest.cpp
@@ -0,0 +1,71 @@
+//===- unittest/Format/NumericLiteralInfoTest.cpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../lib/Format/NumericLiteralInfo.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+static constexpr auto npos = llvm::StringRef::npos;
+
+class NumericLiteralInfoTest : public testing::Test {
+protected:
+ bool verifyInfo(const NumericLiteralInfo &Info, size_t BaseLetterPos = npos,
+ size_t DotPos = npos, size_t ExponentLetterPos = npos,
+ size_t SuffixPos = npos) {
+ return Info.BaseLetterPos == BaseLetterPos && Info.DotPos == DotPos &&
+ Info.ExponentLetterPos == ExponentLetterPos &&
+ Info.SuffixPos == SuffixPos;
+ }
+};
+
+TEST_F(NumericLiteralInfoTest, IntegerLiteral) {
+ // Decimal.
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("90")));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("9L"), npos, npos, npos, 1));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("9'0U"), npos, npos, npos, 3));
+
+ // Octal.
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0")));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("07")));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0z"), npos, npos, npos, 1));
+ // JavaScript.
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0o7"), 1));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0O7_0", '_'), 1));
+
+ // Binary.
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0b1"), 1));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0B1ul"), 1, npos, npos, 3));
+
+ // Hexadecimal.
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0xF"), 1));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0XfZ"), 1, npos, npos, 3));
+}
+
+TEST_F(NumericLiteralInfoTest, FloatingPointLiteral) {
+ // Decimal.
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo(".9"), npos, 0));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("9."), npos, 1));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("9.F"), npos, 1, npos, 2));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("9e9"), npos, npos, 1));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("9E-9f"), npos, npos, 1, 4));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("9.9e+9bf16"), npos, 1, 3, 6));
+
+ // Hexadecimal.
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0X.Fp9"), 1, 2, 4));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0xF.P9"), 1, 3, 4));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0xFp9"), 1, npos, 3));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0xFp+9F128"), 1, npos, 3, 6));
+ EXPECT_TRUE(verifyInfo(NumericLiteralInfo("0xF.Fp-9_Pa"), 1, 3, 5, 8));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang
>From 3afb0f860b3cfc0f383279d5299004f389f75bb7 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Sun, 10 Aug 2025 23:43:05 -0700
Subject: [PATCH 2/2] nfc: minor simplifications
---
clang/lib/Format/NumericLiteralInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Format/NumericLiteralInfo.cpp b/clang/lib/Format/NumericLiteralInfo.cpp
index 30284fbe0c51f..b1c308b732550 100644
--- a/clang/lib/Format/NumericLiteralInfo.cpp
+++ b/clang/lib/Format/NumericLiteralInfo.cpp
@@ -42,14 +42,14 @@ NumericLiteralInfo::NumericLiteralInfo(StringRef Text, char Separator) {
}
}
- DotPos = Text.find('.', BaseLetterPos == 1 ? 2 : 0); // e.g. 0x.1 or .1
+ DotPos = Text.find('.', BaseLetterPos + 1); // e.g. 0x.1 or .1
// e.g. 1.e2 or 0xFp2
const auto Pos = DotPos != StringRef::npos ? DotPos + 1 : BaseLetterPos + 2;
ExponentLetterPos =
// Trim C++ user-defined suffix as in `1_Pa`.
- (Separator == '\'' ? Text.substr(0, Text.find('_')) : Text)
+ (Separator == '\'' ? Text.take_front(Text.find('_')) : Text)
.find_insensitive(IsHex ? 'p' : 'e', Pos);
const bool HasExponent = ExponentLetterPos != StringRef::npos;
More information about the cfe-commits
mailing list