[clang] 89aad1e - Reland [clang-format] Add an option to format integer literal separators

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Sat Dec 31 17:57:40 PST 2022


Author: Owen Pan
Date: 2022-12-31T17:57:33-08:00
New Revision: 89aad1e6a397447f9574bb088f4de1d9044b5812

URL: https://github.com/llvm/llvm-project/commit/89aad1e6a397447f9574bb088f4de1d9044b5812
DIFF: https://github.com/llvm/llvm-project/commit/89aad1e6a397447f9574bb088f4de1d9044b5812.diff

LOG: Reland [clang-format] Add an option to format integer literal separators

Previously committed in 46c94e5067b5 which was reverted in f0756e086010
due to a memory bug.

Closes #58949.

Differential Revision: https://reviews.llvm.org/D140543

Added: 
    clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
    clang/lib/Format/IntegerLiteralSeparatorFixer.h
    clang/unittests/Format/IntegerLiteralSeparatorTest.cpp

Modified: 
    clang/docs/ClangFormatStyleOptions.rst
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Format/Format.h
    clang/lib/Format/CMakeLists.txt
    clang/lib/Format/Format.cpp
    clang/unittests/Format/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index cac0afe0bffcc..989c91e9a3efe 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -3159,6 +3159,37 @@ the configuration (without a prefix: ``Auto``).
 
 
 
+**IntegerLiteralSeparator** (``IntegerLiteralSeparatorStyle``) :versionbadge:`clang-format 16`
+  Format integer literal separators (``'`` for C++ and ``_`` for C#, Java,
+  and JavaScript).
+
+  Nested configuration flags:
+
+  Separator format of integer literals of 
diff erent bases.
+  <0: Remove separators.
+   0: Leave the literal as is.
+  >0: Insert separators between digits, starting from the rightmost digit.
+
+  * ``int8_t Binary`` .. code-block:: c++
+
+       -1: 0b100111101101
+        0: 0b10011'11'0110'1
+        3: 0b100'111'101'101
+        4: 0b1001'1110'1101
+
+  * ``int8_t Decimal`` .. code-block:: c++
+
+       -1: 18446744073709550592ull
+        0: 184467'440737'0'95505'92ull
+        3: 18'446'744'073'709'550'592ull
+
+  * ``int8_t Hex`` .. code-block:: c++
+
+       -1: 0xDEADBEEFDEADBEEFuz
+        0: 0xDEAD'BEEF'DE'AD'BEE'Fuz
+        2: 0xDE'AD'BE'EF'DE'AD'BE'EFuz
+
+
 **JavaImportGroups** (``List of Strings``) :versionbadge:`clang-format 8`
   A vector of prefixes ordered by the desired groups for Java imports.
 

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 776f0a97d2500..a06f409d78d97 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -867,6 +867,8 @@ clang-format
 - Add ``RequiresExpressionIndentation`` option for configuring the alignment of requires-expressions.
   The default value of this option is ``OuterScope``, which 
diff ers in behavior from clang-format 15.
   To match the default behavior of clang-format 15, use the ``Keyword`` value.
+- Add ``IntegerLiteralSeparator`` option for fixing integer literal separators
+  in C++, C#, Java, and JavaScript.
 
 clang-extdef-mapping
 --------------------

diff  --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 8949520f87b01..9162028c53ede 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2450,6 +2450,37 @@ struct FormatStyle {
   /// \version 11
   TrailingCommaStyle InsertTrailingCommas;
 
+  /// Separator format of integer literals of 
diff erent bases.
+  /// <0: Remove separators.
+  ///  0: Leave the literal as is.
+  /// >0: Insert separators between digits, starting from the rightmost digit.
+  struct IntegerLiteralSeparatorStyle {
+    /// \code
+    ///    -1: 0b100111101101
+    ///     0: 0b10011'11'0110'1
+    ///     3: 0b100'111'101'101
+    ///     4: 0b1001'1110'1101
+    /// \endcode
+    int8_t Binary;
+    /// \code
+    ///    -1: 18446744073709550592ull
+    ///     0: 184467'440737'0'95505'92ull
+    ///     3: 18'446'744'073'709'550'592ull
+    /// \endcode
+    int8_t Decimal;
+    /// \code
+    ///    -1: 0xDEADBEEFDEADBEEFuz
+    ///     0: 0xDEAD'BEEF'DE'AD'BEE'Fuz
+    ///     2: 0xDE'AD'BE'EF'DE'AD'BE'EFuz
+    /// \endcode
+    int8_t Hex;
+  };
+
+  /// Format integer literal separators (``'`` for C++ and ``_`` for C#, Java,
+  /// and JavaScript).
+  /// \version 16
+  IntegerLiteralSeparatorStyle IntegerLiteralSeparator;
+
   /// A vector of prefixes ordered by the desired groups for Java imports.
   ///
   /// One group's prefix can be a subset of another - the longest prefix is
@@ -4089,6 +4120,10 @@ struct FormatStyle {
            IndentWidth == R.IndentWidth &&
            IndentWrappedFunctionNames == R.IndentWrappedFunctionNames &&
            InsertBraces == R.InsertBraces &&
+           IntegerLiteralSeparator.Binary == R.IntegerLiteralSeparator.Binary &&
+           IntegerLiteralSeparator.Decimal ==
+               R.IntegerLiteralSeparator.Decimal &&
+           IntegerLiteralSeparator.Hex == R.IntegerLiteralSeparator.Hex &&
            JavaImportGroups == R.JavaImportGroups &&
            JavaScriptQuotes == R.JavaScriptQuotes &&
            JavaScriptWrapImports == R.JavaScriptWrapImports &&

diff  --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 4ea02ea72bc77..3d3f3c6bf22e7 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_library(clangFormat
   Format.cpp
   FormatToken.cpp
   FormatTokenLexer.cpp
+  IntegerLiteralSeparatorFixer.cpp
   MacroCallReconstructor.cpp
   MacroExpander.cpp
   NamespaceEndCommentsFixer.cpp

diff  --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 77f6892e5d09a..3304aacff070a 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -20,6 +20,7 @@
 #include "FormatInternal.h"
 #include "FormatToken.h"
 #include "FormatTokenLexer.h"
+#include "IntegerLiteralSeparatorFixer.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "QualifierAlignmentFixer.h"
 #include "SortJavaScriptImports.h"
@@ -335,6 +336,14 @@ struct ScalarEnumerationTraits<FormatStyle::IndentExternBlockStyle> {
   }
 };
 
+template <> struct MappingTraits<FormatStyle::IntegerLiteralSeparatorStyle> {
+  static void mapping(IO &IO, FormatStyle::IntegerLiteralSeparatorStyle &Base) {
+    IO.mapOptional("Binary", Base.Binary);
+    IO.mapOptional("Decimal", Base.Decimal);
+    IO.mapOptional("Hex", Base.Hex);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
   static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
     IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
@@ -881,6 +890,7 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.IndentWrappedFunctionNames);
     IO.mapOptional("InsertBraces", Style.InsertBraces);
     IO.mapOptional("InsertTrailingCommas", Style.InsertTrailingCommas);
+    IO.mapOptional("IntegerLiteralSeparator", Style.IntegerLiteralSeparator);
     IO.mapOptional("JavaImportGroups", Style.JavaImportGroups);
     IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
     IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports);
@@ -1335,6 +1345,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.IndentWrappedFunctionNames = false;
   LLVMStyle.InsertBraces = false;
   LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None;
+  LLVMStyle.IntegerLiteralSeparator = {/*Binary=*/0, /*Decimal=*/0, /*Hex=*/0};
   LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
   LLVMStyle.JavaScriptWrapImports = true;
   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
@@ -3391,6 +3402,10 @@ reformat(const FormatStyle &Style, StringRef Code,
       AnalyzerPass;
   SmallVector<AnalyzerPass, 8> Passes;
 
+  Passes.emplace_back([&](const Environment &Env) {
+    return IntegerLiteralSeparatorFixer().process(Env, Expanded);
+  });
+
   if (Style.isCpp()) {
     if (Style.QualifierAlignment != FormatStyle::QAS_Leave) {
       Passes.emplace_back([&](const Environment &Env) {

diff  --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
new file mode 100644
index 0000000000000..4082a1cf8385c
--- /dev/null
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -0,0 +1,200 @@
+//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
+/// literal separators.
+///
+//===----------------------------------------------------------------------===//
+
+#include "IntegerLiteralSeparatorFixer.h"
+
+namespace clang {
+namespace format {
+
+enum class Base { Binary, Decimal, Hex, Other };
+
+static Base getBase(const StringRef IntegerLiteral) {
+  assert(IntegerLiteral.size() > 1);
+
+  if (IntegerLiteral[0] > '0') {
+    assert(IntegerLiteral[0] <= '9');
+    return Base::Decimal;
+  }
+
+  assert(IntegerLiteral[0] == '0');
+
+  switch (IntegerLiteral[1]) {
+  case 'b':
+  case 'B':
+    return Base::Binary;
+  case 'x':
+  case 'X':
+    return Base::Hex;
+  default:
+    return Base::Other;
+  }
+}
+
+std::pair<tooling::Replacements, unsigned>
+IntegerLiteralSeparatorFixer::process(const Environment &Env,
+                                      const FormatStyle &Style) {
+  switch (Style.Language) {
+  case FormatStyle::LK_Cpp:
+  case FormatStyle::LK_ObjC:
+    Separator = '\'';
+    break;
+  case FormatStyle::LK_CSharp:
+  case FormatStyle::LK_Java:
+  case FormatStyle::LK_JavaScript:
+    Separator = '_';
+    break;
+  default:
+    return {};
+  }
+
+  const auto &Option = Style.IntegerLiteralSeparator;
+  const auto Binary = Option.Binary;
+  const auto Decimal = Option.Decimal;
+  const auto Hex = Option.Hex;
+  const bool SkipBinary = Binary == 0;
+  const bool SkipDecimal = Decimal == 0;
+  const bool SkipHex = Hex == 0;
+
+  if (SkipBinary && SkipDecimal && SkipHex)
+    return {};
+
+  const auto &SourceMgr = Env.getSourceManager();
+  AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
+
+  const auto ID = Env.getFileID();
+  const auto LangOpts = getFormattingLangOpts(Style);
+  Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
+  Lex.SetCommentRetentionState(true);
+
+  Token Tok;
+  Lex.LexFromRawLexer(Tok);
+
+  tooling::Replacements Result;
+  for (bool Skip = false; Tok.isNot(tok::eof); Lex.LexFromRawLexer(Tok)) {
+    auto Length = Tok.getLength();
+    if (Length < 2)
+      continue;
+    auto Location = Tok.getLocation();
+    auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
+    if (Tok.is(tok::comment)) {
+      if (Text == "// clang-format off" || Text == "/* clang-format off */")
+        Skip = true;
+      else if (Text == "// clang-format on" || Text == "/* clang-format on */")
+        Skip = false;
+      continue;
+    }
+    if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
+        !AffectedRangeMgr.affectsCharSourceRange(
+            CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
+      continue;
+    }
+    const auto B = getBase(Text);
+    const bool IsBase2 = B == Base::Binary;
+    const bool IsBase10 = B == Base::Decimal;
+    const bool IsBase16 = B == Base::Hex;
+    if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
+        (IsBase16 && SkipHex) || B == Base::Other) {
+      continue;
+    }
+    if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
+        (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
+      continue;
+    }
+    if (((IsBase2 && Binary < 0) || (IsBase10 && Decimal < 0) ||
+         (IsBase16 && Hex < 0)) &&
+        Text.find(Separator) == StringRef::npos) {
+      continue;
+    }
+    const auto Start = Text[0] == '0' ? 2 : 0;
+    auto End = Text.find_first_of("uUlLzZn");
+    if (End == StringRef::npos)
+      End = Length;
+    if (Start > 0 || End < Length) {
+      Length = End - Start;
+      Text = Text.substr(Start, Length);
+    }
+    auto DigitsPerGroup = Decimal;
+    if (IsBase2)
+      DigitsPerGroup = Binary;
+    else if (IsBase16)
+      DigitsPerGroup = Hex;
+    if (DigitsPerGroup > 0 && checkSeparator(Text, DigitsPerGroup))
+      continue;
+    if (Start > 0)
+      Location = Location.getLocWithOffset(Start);
+    cantFail(Result.add(tooling::Replacement(SourceMgr, Location, Length,
+                                             format(Text, DigitsPerGroup))));
+  }
+
+  return {Result, 0};
+}
+
+bool IntegerLiteralSeparatorFixer::checkSeparator(
+    const StringRef IntegerLiteral, int DigitsPerGroup) const {
+  assert(DigitsPerGroup > 0);
+
+  int I = 0;
+  for (auto C : llvm::reverse(IntegerLiteral)) {
+    if (C == Separator) {
+      if (I < DigitsPerGroup)
+        return false;
+      I = 0;
+    } else {
+      ++I;
+      if (I == DigitsPerGroup)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
+                                                 int DigitsPerGroup) const {
+  assert(DigitsPerGroup != 0);
+
+  std::string Formatted;
+
+  if (DigitsPerGroup < 0) {
+    for (auto C : IntegerLiteral)
+      if (C != Separator)
+        Formatted.push_back(C);
+    return Formatted;
+  }
+
+  int DigitCount = 0;
+  for (auto C : IntegerLiteral)
+    if (C != Separator)
+      ++DigitCount;
+
+  int Remainder = DigitCount % DigitsPerGroup;
+
+  int I = 0;
+  for (auto C : IntegerLiteral) {
+    if (C == Separator)
+      continue;
+    if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
+      Formatted.push_back(Separator);
+      I = 0;
+      Remainder = 0;
+    }
+    Formatted.push_back(C);
+    ++I;
+  }
+
+  return Formatted;
+}
+
+} // namespace format
+} // namespace clang

diff  --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.h b/clang/lib/Format/IntegerLiteralSeparatorFixer.h
new file mode 100644
index 0000000000000..156bf5c14fca1
--- /dev/null
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.h
@@ -0,0 +1,38 @@
+//===--- IntegerLiteralSeparatorFixer.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares IntegerLiteralSeparatorFixer that fixes C++ integer
+/// literal separators.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
+#define LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+class IntegerLiteralSeparatorFixer {
+public:
+  std::pair<tooling::Replacements, unsigned> process(const Environment &Env,
+                                                     const FormatStyle &Style);
+
+private:
+  bool checkSeparator(const StringRef IntegerLiteral, int DigitsPerGroup) const;
+  std::string format(const StringRef IntegerLiteral, int DigitsPerGroup) const;
+
+  char Separator;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif

diff  --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 26ecd355d531d..09457c105ca30 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -21,6 +21,7 @@ add_clang_unittest(FormatTests
   FormatTestTableGen.cpp
   FormatTestTextProto.cpp
   FormatTestVerilog.cpp
+  IntegerLiteralSeparatorTest.cpp
   MacroCallReconstructorTest.cpp
   MacroExpanderTest.cpp
   NamespaceEndCommentsFixerTest.cpp

diff  --git a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
new file mode 100644
index 0000000000000..8698947818e5e
--- /dev/null
+++ b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
@@ -0,0 +1,228 @@
+//===- unittest/Format/IntegerLiteralSeparatorTest.cpp --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+
+#include "../Tooling/ReplacementTest.h"
+#include "FormatTestUtils.h"
+
+#define DEBUG_TYPE "integer-literal-separator-test"
+
+namespace clang {
+namespace format {
+namespace {
+
+// TODO:
+// Refactor the class declaration, which is copied from BracesInserterTest.cpp.
+class IntegerLiteralSeparatorTest : public ::testing::Test {
+protected:
+  std::string format(llvm::StringRef Code, const FormatStyle &Style,
+                     const std::vector<tooling::Range> &Ranges) {
+    LLVM_DEBUG(llvm::errs() << "---\n");
+    LLVM_DEBUG(llvm::errs() << Code << "\n\n");
+    auto NonEmptyRanges = Ranges;
+    if (Ranges.empty())
+      NonEmptyRanges = {1, tooling::Range(0, Code.size())};
+    FormattingAttemptStatus Status;
+    tooling::Replacements Replaces =
+        reformat(Style, Code, NonEmptyRanges, "<stdin>", &Status);
+    EXPECT_EQ(true, Status.FormatComplete) << Code << "\n\n";
+    ReplacementCount = Replaces.size();
+    auto Result = applyAllReplacements(Code, Replaces);
+    EXPECT_TRUE(static_cast<bool>(Result));
+    LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
+    return *Result;
+  }
+
+  void _verifyFormat(const char *File, int Line, llvm::StringRef Expected,
+                     llvm::StringRef Code,
+                     const FormatStyle &Style = getLLVMStyle(),
+                     const std::vector<tooling::Range> &Ranges = {}) {
+    testing::ScopedTrace t(File, Line, ::testing::Message() << Code.str());
+    EXPECT_EQ(Expected.str(), format(Expected, Style, Ranges))
+        << "Expected code is not stable";
+    EXPECT_EQ(Expected.str(), format(Code, Style, Ranges));
+    if (Style.Language == FormatStyle::LK_Cpp && Ranges.empty()) {
+      // Objective-C++ is a superset of C++, so everything checked for C++
+      // needs to be checked for Objective-C++ as well.
+      FormatStyle ObjCStyle = Style;
+      ObjCStyle.Language = FormatStyle::LK_ObjC;
+      EXPECT_EQ(Expected.str(), format(test::messUp(Code), ObjCStyle, Ranges));
+    }
+  }
+
+  void _verifyFormat(const char *File, int Line, llvm::StringRef Code,
+                     const FormatStyle &Style = getLLVMStyle(),
+                     const std::vector<tooling::Range> &Ranges = {}) {
+    _verifyFormat(File, Line, Code, Code, Style, Ranges);
+  }
+
+  int ReplacementCount;
+};
+
+#define verifyFormat(...) _verifyFormat(__FILE__, __LINE__, __VA_ARGS__)
+
+TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
+  FormatStyle Style = getLLVMStyle();
+  EXPECT_EQ(Style.Language, FormatStyle::LK_Cpp);
+  EXPECT_EQ(Style.IntegerLiteralSeparator.Binary, 0);
+  EXPECT_EQ(Style.IntegerLiteralSeparator.Decimal, 0);
+  EXPECT_EQ(Style.IntegerLiteralSeparator.Hex, 0);
+
+  const StringRef Binary("b = 0b10011'11'0110'1u;");
+  verifyFormat(Binary, Style);
+  Style.IntegerLiteralSeparator.Binary = -1;
+  verifyFormat("b = 0b100111101101u;", Binary, Style);
+  Style.IntegerLiteralSeparator.Binary = 1;
+  verifyFormat("b = 0b1'0'0'1'1'1'1'0'1'1'0'1u;", Binary, Style);
+  Style.IntegerLiteralSeparator.Binary = 4;
+  verifyFormat("b = 0b1001'1110'1101u;", Binary, Style);
+
+  const StringRef Decimal("d = 184467'440737'0'95505'92Ull;");
+  verifyFormat(Decimal, Style);
+  Style.IntegerLiteralSeparator.Decimal = -1;
+  verifyFormat("d = 18446744073709550592Ull;", Decimal, Style);
+  Style.IntegerLiteralSeparator.Decimal = 3;
+  verifyFormat("d = 18'446'744'073'709'550'592Ull;", Decimal, Style);
+
+  const StringRef Hex("h = 0xDEAD'BEEF'DE'AD'BEE'Fuz;");
+  verifyFormat(Hex, Style);
+  Style.IntegerLiteralSeparator.Hex = -1;
+  verifyFormat("h = 0xDEADBEEFDEADBEEFuz;", Hex, Style);
+  Style.IntegerLiteralSeparator.Hex = 2;
+  verifyFormat("h = 0xDE'AD'BE'EF'DE'AD'BE'EFuz;", Hex, Style);
+
+  verifyFormat("o0 = 0;\n"
+               "o1 = 07;\n"
+               "o5 = 012345",
+               Style);
+}
+
+TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {
+  FormatStyle Style = getLLVMStyle();
+  const StringRef Binary("B = 0B10011_11_0110_1;");
+  const StringRef Decimal("d = 184467_440737_0_95505_92;");
+  const StringRef Hex("H = 0XDEAD_BEEF_DE_AD_BEE_F;");
+
+  auto TestUnderscore = [&](auto Language) {
+    Style.Language = Language;
+
+    Style.IntegerLiteralSeparator.Binary = 0;
+    verifyFormat(Binary, Style);
+    Style.IntegerLiteralSeparator.Binary = -1;
+    verifyFormat("B = 0B100111101101;", Binary, Style);
+    Style.IntegerLiteralSeparator.Binary = 4;
+    verifyFormat("B = 0B1001_1110_1101;", Binary, Style);
+
+    Style.IntegerLiteralSeparator.Decimal = 0;
+    verifyFormat(Decimal, Style);
+    Style.IntegerLiteralSeparator.Decimal = -1;
+    verifyFormat("d = 18446744073709550592;", Decimal, Style);
+    Style.IntegerLiteralSeparator.Decimal = 3;
+    verifyFormat("d = 18_446_744_073_709_550_592;", Decimal, Style);
+
+    Style.IntegerLiteralSeparator.Hex = 0;
+    verifyFormat(Hex, Style);
+    Style.IntegerLiteralSeparator.Hex = -1;
+    verifyFormat("H = 0XDEADBEEFDEADBEEF;", Hex, Style);
+    Style.IntegerLiteralSeparator.Hex = 2;
+    verifyFormat("H = 0XDE_AD_BE_EF_DE_AD_BE_EF;", Hex, Style);
+  };
+
+  TestUnderscore(FormatStyle::LK_CSharp);
+  TestUnderscore(FormatStyle::LK_Java);
+  TestUnderscore(FormatStyle::LK_JavaScript);
+
+  verifyFormat("d = 9_007_199_254_740_995n;", Style);
+  verifyFormat("d = 9_007_199_254_740_995n;", "d = 9007199254740995n;", Style);
+
+  Style.IntegerLiteralSeparator.Binary = 8;
+  verifyFormat(
+      "b = 0b100000_00000000_00000000_00000000_00000000_00000000_00000011n;",
+      "b = 0b100000000000000000000000000000000000000000000000000011n;", Style);
+
+  verifyFormat("h = 0x20_00_00_00_00_00_03n;", Style);
+  verifyFormat("h = 0x20_00_00_00_00_00_03n;", "h = 0x20000000000003n;", Style);
+
+  verifyFormat("o = 0o400000000000000003n;", Style);
+}
+
+TEST_F(IntegerLiteralSeparatorTest, FixRanges) {
+  FormatStyle Style = getLLVMStyle();
+  Style.IntegerLiteralSeparator.Decimal = 3;
+
+  const StringRef Code("i = -12'34;\n"
+                       "// clang-format off\n"
+                       "j = 123'4;\n"
+                       "// clang-format on\n"
+                       "k = +1'23'4;");
+  const StringRef Expected("i = -1'234;\n"
+                           "// clang-format off\n"
+                           "j = 123'4;\n"
+                           "// clang-format on\n"
+                           "k = +1'234;");
+
+  verifyFormat(Expected, Code, Style);
+
+  verifyFormat("i = -1'234;\n"
+               "// clang-format off\n"
+               "j = 123'4;\n"
+               "// clang-format on\n"
+               "k = +1'23'4;",
+               Code, Style, {tooling::Range(0, 11)}); // line 1
+
+  verifyFormat(Code, Style, {tooling::Range(32, 10)}); // line 3
+
+  verifyFormat("i = -12'34;\n"
+               "// clang-format off\n"
+               "j = 123'4;\n"
+               "// clang-format on\n"
+               "k = +1'234;",
+               Code, Style, {tooling::Range(61, 12)}); // line 5
+
+  verifyFormat(Expected, Code, Style,
+               {tooling::Range(0, 11), tooling::Range(61, 12)}); // lines 1, 5
+}
+
+TEST_F(IntegerLiteralSeparatorTest, FloatingPoint) {
+  FormatStyle Style = getLLVMStyle();
+  Style.IntegerLiteralSeparator.Decimal = 3;
+  Style.IntegerLiteralSeparator.Hex = 2;
+
+  verifyFormat("d0 = .0;\n"
+               "d1 = 0.;\n"
+               "y = 7890.;\n"
+               "E = 3456E2;\n"
+               "p = 0xABCp2;",
+               Style);
+
+  Style.Language = FormatStyle::LK_JavaScript;
+  verifyFormat("y = 7890.;\n"
+               "e = 3456e2;",
+               Style);
+
+  Style.Language = FormatStyle::LK_Java;
+  verifyFormat("y = 7890.;\n"
+               "E = 3456E2;\n"
+               "P = 0xABCP2;\n"
+               "f = 1234f;\n"
+               "D = 5678D;",
+               Style);
+
+  Style.Language = FormatStyle::LK_CSharp;
+  verifyFormat("y = 7890.;\n"
+               "e = 3456e2;\n"
+               "F = 1234F;\n"
+               "d = 5678d;\n"
+               "M = 9012M",
+               Style);
+}
+
+} // namespace
+} // namespace format
+} // namespace clang


        


More information about the cfe-commits mailing list