[clang] [clang-format] Add an option to format integer and float literal case (PR #151590)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Aug 30 22:59:51 PDT 2025
================
@@ -0,0 +1,176 @@
+//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements NumericLiteralCaseFixer that standardizes character
+/// case within numeric literals.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NumericLiteralCaseFixer.h"
+#include "NumericLiteralInfo.h"
+
+#include "llvm/ADT/StringExtras.h"
+
+#include <algorithm>
+
+namespace clang {
+namespace format {
+
+static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) {
+ // Check if language is supported.
+ switch (Style.Language) {
+ case FormatStyle::LK_C:
+ case FormatStyle::LK_Cpp:
+ case FormatStyle::LK_ObjC:
+ case FormatStyle::LK_CSharp:
+ case FormatStyle::LK_Java:
+ case FormatStyle::LK_JavaScript:
+ break;
+ default:
+ return false;
+ }
+
+ // Check if style options are set.
+ const auto &Option = Style.NumericLiteralCase;
+ const auto Leave = FormatStyle::NLCS_Leave;
+ return Option.Prefix != Leave || Option.HexDigit != Leave ||
+ Option.ExponentLetter != Leave || Option.Suffix != Leave;
+}
+
+static std::string
+transformComponent(StringRef Component,
+ FormatStyle::NumericLiteralComponentStyle ConfigValue) {
+ switch (ConfigValue) {
+ case FormatStyle::NLCS_Upper:
+ return Component.upper();
+ case FormatStyle::NLCS_Lower:
+ return Component.lower();
+ case FormatStyle::NLCS_Leave:
+ default:
+ return Component.str();
+ }
+}
+
+/// Test if Suffix matches a C++ literal reserved by the library.
+/// Matches against all suffixes reserved in the C++23 standard.
+static bool matchesReservedSuffix(StringRef Suffix) {
+ static constexpr std::array<StringRef, 11> SortedReservedSuffixes = {
+ "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y"};
+
+ // This can be static_assert when we have access to constexpr is_sorted in
+ // C++ 20.
+ assert(llvm::is_sorted(SortedReservedSuffixes) &&
+ "Must be sorted as precondition for lower_bound().");
+
+ auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix);
+ if (entry == SortedReservedSuffixes.cend())
+ return false;
+ return *entry == Suffix;
+}
+
+static std::string format(StringRef NumericLiteral, const FormatStyle &Style) {
+ const char Separator = Style.isCpp() ? '\'' : '_';
+ const NumericLiteralInfo Info(NumericLiteral, Separator);
+ const bool HasBaseLetter = Info.BaseLetterPos != llvm::StringRef::npos;
+ const bool HasExponent = Info.ExponentLetterPos != llvm::StringRef::npos;
+ const bool HasSuffix = Info.SuffixPos != llvm::StringRef::npos;
+
+ std::string Formatted;
+
+ if (HasBaseLetter) {
+ Formatted +=
+ transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos),
+ Style.NumericLiteralCase.Prefix);
+ }
+ // Reformat this slice as HexDigit whether or not the digit has hexadecimal
+ // characters because binary/decimal/octal digits are unchanged.
+ Formatted += transformComponent(
+ NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0,
+ HasExponent ? Info.ExponentLetterPos
+ : HasSuffix ? Info.SuffixPos
+ : NumericLiteral.size()),
+ Style.NumericLiteralCase.HexDigit);
+
+ if (HasExponent) {
+ Formatted += transformComponent(
+ NumericLiteral.slice(Info.ExponentLetterPos,
+ HasSuffix ? Info.SuffixPos
+ : NumericLiteral.size()),
+ Style.NumericLiteralCase.ExponentLetter);
+ }
+
+ if (HasSuffix) {
+ StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos);
+ if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') {
+ // In C++, it is idiomatic, but NOT standardized to define user-defined
+ // literals with a leading '_'. Omit user defined literals and standard
+ // reserved suffixes from transformation.
+ Formatted += Suffix.str();
+ } else {
+ Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix);
+ }
+ }
+
+ return Formatted;
+}
+
+std::pair<tooling::Replacements, unsigned>
+NumericLiteralCaseFixer::process(const Environment &Env,
+ const FormatStyle &Style) {
+ if (!isNumericLiteralCaseFixerNeeded(Style))
+ return {};
+
+ const auto &SourceMgr = Env.getSourceManager();
+ AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
+
+ const auto ID = Env.getFileID();
+ const auto LangOpts = getFormattingLangOpts(Style);
+ Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
+ Lex.SetCommentRetentionState(true);
+
+ Token Tok;
+ tooling::Replacements Result;
+
+ for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
+ // Skip tokens that are too small to contain a formattable literal.
+ // Size=2 is the smallest possible literal that could contain formattable
+ // components, for example "1u".
+ auto Length = Tok.getLength();
+ if (Length < 2)
+ continue;
+
+ // Service clang-format off/on comments.
+ auto Location = Tok.getLocation();
+ auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
+ if (Tok.is(tok::comment)) {
+ if (isClangFormatOff(Text))
+ Skip = true;
+ else if (isClangFormatOn(Text))
+ Skip = false;
+ continue;
+ }
+
+ if (Skip || Tok.isNot(tok::numeric_constant) ||
+ !AffectedRangeMgr.affectsCharSourceRange(
+ CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
+ continue;
+ }
+
+ const auto Formatted = format(Text, Style);
----------------
owenca wrote:
> That wouldn't work, because it is needed for the comparison.
That was why I had asked. 😉 Anyway, because it seems that copy elision is guaranteed (since C++17), my suggestion was no good.
https://github.com/llvm/llvm-project/pull/151590
More information about the cfe-commits
mailing list