[llvm] 5a1958f - Syndicate, test and fix base64 implementation

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 01:02:40 PST 2020


Author: serge-sans-paille
Date: 2020-03-02T10:02:25+01:00
New Revision: 5a1958f2673f8c771e406a7e309e160b432c9a79

URL: https://github.com/llvm/llvm-project/commit/5a1958f2673f8c771e406a7e309e160b432c9a79
DIFF: https://github.com/llvm/llvm-project/commit/5a1958f2673f8c771e406a7e309e160b432c9a79.diff

LOG: Syndicate, test and fix base64 implementation

Move Base64 implementation from clangd/SemanticHighlighting to
llvm/Support/Base64, fix its implementation and provide a decent test suite.

Previous implementation code was using + operator instead of | to combine some
results, which is a problem when shifting signed values. (0xFF << 16) is
implicitly converted to a (signed) int, and thus results in 0xffff0000, which is
negative. Combining negative numbers with a + in that context is not what we
want to do.

This fixes https://github.com/llvm/llvm-project/issues/149.

Differential Revision: https://reviews.llvm.org/D75057

Added: 
    llvm/include/llvm/Support/Base64.h
    llvm/unittests/Support/Base64Test.cpp

Modified: 
    clang-tools-extra/clangd/SemanticHighlighting.cpp
    compiler-rt/lib/fuzzer/FuzzerUtil.cpp
    llvm/unittests/Support/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index 5c71db5d80b3..e7b1618fd2d4 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Base64.h"
 #include "llvm/Support/Casting.h"
 #include <algorithm>
 
@@ -283,37 +284,6 @@ class CollectExtraHighlightings
   HighlightingsBuilder &H;
 };
 
-// Encode binary data into base64.
-// This was copied from compiler-rt/lib/fuzzer/FuzzerUtil.cpp.
-// FIXME: Factor this out into llvm/Support?
-std::string encodeBase64(const llvm::SmallVectorImpl<char> &Bytes) {
-  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-                              "abcdefghijklmnopqrstuvwxyz"
-                              "0123456789+/";
-  std::string Res;
-  size_t I;
-  for (I = 0; I + 2 < Bytes.size(); I += 3) {
-    uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8) + Bytes[I + 2];
-    Res += Table[(X >> 18) & 63];
-    Res += Table[(X >> 12) & 63];
-    Res += Table[(X >> 6) & 63];
-    Res += Table[X & 63];
-  }
-  if (I + 1 == Bytes.size()) {
-    uint32_t X = (Bytes[I] << 16);
-    Res += Table[(X >> 18) & 63];
-    Res += Table[(X >> 12) & 63];
-    Res += "==";
-  } else if (I + 2 == Bytes.size()) {
-    uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8);
-    Res += Table[(X >> 18) & 63];
-    Res += Table[(X >> 12) & 63];
-    Res += Table[(X >> 6) & 63];
-    Res += "=";
-  }
-  return Res;
-}
-
 void write32be(uint32_t I, llvm::raw_ostream &OS) {
   std::array<char, 4> Buf;
   llvm::support::endian::write32be(Buf.data(), I);

diff  --git a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp
index 7aa84a1faad7..87180d1ea85d 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp
@@ -151,32 +151,36 @@ bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units) {
   return true;
 }
 
+// Code duplicated (and tested) in llvm/include/llvm/Support/Base64.h
 std::string Base64(const Unit &U) {
   static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                               "abcdefghijklmnopqrstuvwxyz"
                               "0123456789+/";
-  std::string Res;
-  size_t i;
-  for (i = 0; i + 2 < U.size(); i += 3) {
-    uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2];
-    Res += Table[(x >> 18) & 63];
-    Res += Table[(x >> 12) & 63];
-    Res += Table[(x >> 6) & 63];
-    Res += Table[x & 63];
+  std::string Buffer;
+  Buffer.resize(((U.size() + 2) / 3) * 4);
+
+  size_t i = 0, j = 0;
+  for (size_t n = U.size() / 3 * 3; i < n; i += 3, j += 4) {
+    uint32_t x = (U[i] << 16) | (U[i + 1] << 8) | U[i + 2];
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = Table[(x >> 6) & 63];
+    Buffer[j + 3] = Table[x & 63];
   }
   if (i + 1 == U.size()) {
     uint32_t x = (U[i] << 16);
-    Res += Table[(x >> 18) & 63];
-    Res += Table[(x >> 12) & 63];
-    Res += "==";
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = '=';
+    Buffer[j + 3] = '=';
   } else if (i + 2 == U.size()) {
-    uint32_t x = (U[i] << 16) + (U[i + 1] << 8);
-    Res += Table[(x >> 18) & 63];
-    Res += Table[(x >> 12) & 63];
-    Res += Table[(x >> 6) & 63];
-    Res += "=";
+    uint32_t x = (U[i] << 16) | (U[i + 1] << 8);
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = Table[(x >> 6) & 63];
+    Buffer[j + 3] = '=';
   }
-  return Res;
+  return Buffer;
 }
 
 static std::mutex SymbolizeMutex;

diff  --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h
new file mode 100644
index 000000000000..3f6616633e5f
--- /dev/null
+++ b/llvm/include/llvm/Support/Base64.h
@@ -0,0 +1,53 @@
+//===--- Base64.h - Base64 Encoder/Decoder ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides generic base64 encoder/decoder.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_BASE64_H
+#define LLVM_SUPPORT_BASE64_H
+
+#include <string>
+
+namespace llvm {
+
+template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
+  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                              "abcdefghijklmnopqrstuvwxyz"
+                              "0123456789+/";
+  std::string Buffer;
+  Buffer.resize(((Bytes.size() + 2) / 3) * 4);
+
+  size_t i = 0, j = 0;
+  for (size_t n = Bytes.size() / 3 * 3; i < n; i += 3, j += 4) {
+    uint32_t x = (Bytes[i] << 16) | (Bytes[i + 1] << 8) | Bytes[i + 2];
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = Table[(x >> 6) & 63];
+    Buffer[j + 3] = Table[x & 63];
+  }
+  if (i + 1 == Bytes.size()) {
+    uint32_t x = (Bytes[i] << 16);
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = '=';
+    Buffer[j + 3] = '=';
+  } else if (i + 2 == Bytes.size()) {
+    uint32_t x = (Bytes[i] << 16) | (Bytes[i + 1] << 8);
+    Buffer[j + 0] = Table[(x >> 18) & 63];
+    Buffer[j + 1] = Table[(x >> 12) & 63];
+    Buffer[j + 2] = Table[(x >> 6) & 63];
+    Buffer[j + 3] = '=';
+  }
+  return Buffer;
+}
+
+} // end namespace llvm
+
+#endif

diff  --git a/llvm/unittests/Support/Base64Test.cpp b/llvm/unittests/Support/Base64Test.cpp
new file mode 100644
index 000000000000..0424678fe9c1
--- /dev/null
+++ b/llvm/unittests/Support/Base64Test.cpp
@@ -0,0 +1,53 @@
+//===- llvm/unittest/Support/Base64Test.cpp - Base64 tests
+//--------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unit tests for the Base64 functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Base64.h"
+#include "llvm/ADT/StringRef.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+/// Tests an arbitrary set of bytes passed as \p Input.
+void TestBase64(StringRef Input, StringRef Final) {
+  auto Res = encodeBase64(Input);
+  EXPECT_EQ(Res, Final);
+}
+
+} // namespace
+
+TEST(Base64Test, Base64) {
+  // from: https://tools.ietf.org/html/rfc4648#section-10
+  TestBase64("", "");
+  TestBase64("f", "Zg==");
+  TestBase64("fo", "Zm8=");
+  TestBase64("foo", "Zm9v");
+  TestBase64("foob", "Zm9vYg==");
+  TestBase64("fooba", "Zm9vYmE=");
+  TestBase64("foobar", "Zm9vYmFy");
+
+  // With non-printable values.
+  char NonPrintableVector[] = {0x00, 0x00, 0x00,       0x46,
+                               0x00, 0x08, (char)0xff, (char)0xee};
+  TestBase64(StringRef(NonPrintableVector, sizeof(NonPrintableVector)),
+             "AAAARgAI/+4=");
+
+  // Large test case
+  char LargeVector[] = {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b,
+                        0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f,
+                        0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f,
+                        0x76, 0x65, 0x72, 0x20, 0x31, 0x33, 0x20, 0x6c, 0x61,
+                        0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67, 0x73, 0x2e};
+  TestBase64(LargeVector,
+             "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIDEzIGxhenkgZG9ncy4=");
+}

diff  --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index 161891517cf3..ebb7aaa3ca75 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_unittest(SupportTests
   AnnotationsTest.cpp
   ARMAttributeParser.cpp
   ArrayRecyclerTest.cpp
+  Base64Test.cpp
   BinaryStreamTest.cpp
   BlockFrequencyTest.cpp
   BranchProbabilityTest.cpp


        


More information about the llvm-commits mailing list