[libc-commits] [libc] dc13a9a - [libc] Add strcpsn and strpbrk implementation.

via libc-commits libc-commits at lists.llvm.org
Fri Aug 7 13:14:59 PDT 2020


Author: cgyurgyik
Date: 2020-08-07T16:14:32-04:00
New Revision: dc13a9a7813768e01bddd03924d6cac6fa45cd7b

URL: https://github.com/llvm/llvm-project/commit/dc13a9a7813768e01bddd03924d6cac6fa45cd7b
DIFF: https://github.com/llvm/llvm-project/commit/dc13a9a7813768e01bddd03924d6cac6fa45cd7b.diff

LOG: [libc] Add strcpsn and strpbrk implementation.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D85386

Added: 
    libc/src/string/strcspn.cpp
    libc/src/string/strcspn.h
    libc/src/string/string_utils.h
    libc/src/string/strpbrk.cpp
    libc/src/string/strpbrk.h
    libc/test/src/string/strcspn_test.cpp
    libc/test/src/string/strpbrk_test.cpp

Modified: 
    libc/config/linux/aarch64/entrypoints.txt
    libc/config/linux/x86_64/entrypoints.txt
    libc/src/string/CMakeLists.txt
    libc/src/string/strspn.cpp
    libc/test/src/string/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index e1f54bfaa4fd..aad7819ef551 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -27,8 +27,10 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strcat
     libc.src.string.strchr
     libc.src.string.strcpy
+    libc.src.string.strcspn
     libc.src.string.strlen
     libc.src.string.strnlen
+    libc.src.string.strpbrk
     libc.src.string.strrchr
     libc.src.string.strspn
     libc.src.string.strstr

diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index db76c351db7f..ca3a2bd6c477 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -45,8 +45,10 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strchr
     libc.src.string.strcmp
     libc.src.string.strcpy
+    libc.src.string.strcspn
     libc.src.string.strlen
     libc.src.string.strnlen
+    libc.src.string.strpbrk
     libc.src.string.strrchr
     libc.src.string.strspn
     libc.src.string.strstr

diff  --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 82d3457ae340..b6a15c3f460b 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -1,5 +1,13 @@
 add_subdirectory(memory_utils)
 
+add_header_library(
+  string_utils
+  HDRS
+    string_utils.h
+  DEPENDS
+    libc.utils.CPP.standalone_cpp
+)
+
 add_entrypoint_object(
   strcat
   SRCS
@@ -94,12 +102,34 @@ add_entrypoint_object(
     strrchr.h
 )
 
+add_entrypoint_object(
+  strcspn
+  SRCS
+    strcspn.cpp
+  HDRS
+    strcspn.h
+  DEPENDS
+    .string_utils
+)
+
 add_entrypoint_object(
   strspn
   SRCS
     strspn.cpp
   HDRS
     strspn.h
+  DEPENDS
+    libc.utils.CPP.standalone_cpp
+)
+
+add_entrypoint_object(
+  strpbrk
+  SRCS
+    strpbrk.cpp
+  HDRS
+    strpbrk.h
+  DEPENDS
+    .string_utils
 )
 
 # Helper to define a function with multiple implementations

diff  --git a/libc/src/string/strcspn.cpp b/libc/src/string/strcspn.cpp
new file mode 100644
index 000000000000..a19b3fb16820
--- /dev/null
+++ b/libc/src/string/strcspn.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of strcspn -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strcspn.h"
+
+#include "src/__support/common.h"
+#include "src/string/string_utils.h"
+
+namespace __llvm_libc {
+
+size_t LLVM_LIBC_ENTRYPOINT(strcspn)(const char *src, const char *segment) {
+  return internal::complementary_span(src, segment);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/string/strcspn.h b/libc/src/string/strcspn.h
new file mode 100644
index 000000000000..9674f4b6359b
--- /dev/null
+++ b/libc/src/string/strcspn.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for strcspn -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRCSPN_H
+#define LLVM_LIBC_SRC_STRING_STRCSPN_H
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+
+size_t strcspn(const char *src, const char *segment);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_STRCSPN_H

diff  --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
new file mode 100644
index 000000000000..54c1bac96985
--- /dev/null
+++ b/libc/src/string/string_utils.h
@@ -0,0 +1,36 @@
+//===-- String utils --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC_STRING_STRING_UTILS_H
+#define LIBC_SRC_STRING_STRING_UTILS_H
+
+#include "src/string/memory_utils/utils.h"
+
+#include "utils/CPP/Bitset.h"
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+namespace internal {
+
+// Returns the maximum length span that contains only characters not found in
+// 'segment'. If no characters are found, returns the length of 'src'.
+static inline size_t complementary_span(const char *src, const char *segment) {
+  const char *initial = src;
+  cpp::Bitset<256> bitset;
+
+  for (; *segment; ++segment)
+    bitset.set(*segment);
+  for (; *src && !bitset.test(*src); ++src)
+    ;
+  return src - initial;
+}
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif //  LIBC_SRC_STRING_STRING_UTILS_H

diff  --git a/libc/src/string/strpbrk.cpp b/libc/src/string/strpbrk.cpp
new file mode 100644
index 000000000000..bbd2cec9188f
--- /dev/null
+++ b/libc/src/string/strpbrk.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of strpbrk -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strpbrk.h"
+
+#include "src/__support/common.h"
+#include "src/string/string_utils.h"
+
+namespace __llvm_libc {
+
+char *LLVM_LIBC_ENTRYPOINT(strpbrk)(const char *src, const char *breakset) {
+  src += internal::complementary_span(src, breakset);
+  return *src ? const_cast<char *>(src) : nullptr;
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/string/strpbrk.h b/libc/src/string/strpbrk.h
new file mode 100644
index 000000000000..823cd35e9e30
--- /dev/null
+++ b/libc/src/string/strpbrk.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for strpbrk -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRPBRK_H
+#define LLVM_LIBC_SRC_STRING_STRPBRK_H
+
+namespace __llvm_libc {
+
+char *strpbrk(const char *src, const char *breakset);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_STRPBRK_H

diff  --git a/libc/src/string/strspn.cpp b/libc/src/string/strspn.cpp
index f01bc01345de..c85fc8b69ee0 100644
--- a/libc/src/string/strspn.cpp
+++ b/libc/src/string/strspn.cpp
@@ -11,7 +11,6 @@
 #include "src/__support/common.h"
 #include "utils/CPP/Bitset.h"
 #include <stddef.h>
-#include <stdint.h>
 
 namespace __llvm_libc {
 

diff  --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt
index e1db8d67becd..f7c580dd1876 100644
--- a/libc/test/src/string/CMakeLists.txt
+++ b/libc/test/src/string/CMakeLists.txt
@@ -102,6 +102,16 @@ add_libc_unittest(
     libc.src.string.strrchr
 )
 
+add_libc_unittest(
+  strcspn_test
+  SUITE
+    libc_string_unittests
+  SRCS
+    strcspn_test.cpp
+  DEPENDS
+    libc.src.string.strcspn
+)
+
 add_libc_unittest(
   strspn_test
   SUITE
@@ -112,6 +122,15 @@ add_libc_unittest(
     libc.src.string.strspn
 )
 
+add_libc_unittest(
+  strpbrk_test
+  SUITE
+    libc_string_unittests
+  SRCS
+    strpbrk_test.cpp
+  DEPENDS
+    libc.src.string.strpbrk
+)
 
 # Tests all implementations that can run on the host.
 function(add_libc_multi_impl_test name)

diff  --git a/libc/test/src/string/strcspn_test.cpp b/libc/test/src/string/strcspn_test.cpp
new file mode 100644
index 000000000000..80de495f9623
--- /dev/null
+++ b/libc/test/src/string/strcspn_test.cpp
@@ -0,0 +1,50 @@
+//===-- Unittests for strcspn ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strcspn.h"
+
+#include "utils/UnitTest/Test.h"
+
+TEST(StrCSpnTest, ComplementarySpanShouldNotGoPastNullTerminator) {
+  const char src[5] = {'a', 'b', '\0', 'c', 'd'};
+  EXPECT_EQ(__llvm_libc::strcspn(src, "b"), size_t{1});
+  EXPECT_EQ(__llvm_libc::strcspn(src, "d"), size_t{2});
+
+  // Same goes for the segment to be searched for.
+  const char segment[5] = {'1', '2', '\0', '3', '4'};
+  EXPECT_EQ(__llvm_libc::strcspn("123", segment), size_t{0});
+}
+
+TEST(StrCSpnTest, ComplementarySpanForEachIndividualCharacter) {
+  const char *src = "12345";
+  // The complementary span size should increment accordingly.
+  EXPECT_EQ(__llvm_libc::strcspn(src, "1"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strcspn(src, "2"), size_t{1});
+  EXPECT_EQ(__llvm_libc::strcspn(src, "3"), size_t{2});
+  EXPECT_EQ(__llvm_libc::strcspn(src, "4"), size_t{3});
+  EXPECT_EQ(__llvm_libc::strcspn(src, "5"), size_t{4});
+}
+
+TEST(StrCSpnTest, ComplementarySpanIsStringLengthIfNoCharacterFound) {
+  // Null terminator.
+  EXPECT_EQ(__llvm_libc::strcspn("", ""), size_t{0});
+  EXPECT_EQ(__llvm_libc::strcspn("", "_"), size_t{0});
+  // Single character.
+  EXPECT_EQ(__llvm_libc::strcspn("a", "b"), size_t{1});
+  // Multiple characters.
+  EXPECT_EQ(__llvm_libc::strcspn("abc", "1"), size_t{3});
+}
+
+TEST(StrCSpnTest, DuplicatedCharactersNotPartOfComplementarySpan) {
+  // Complementary span should be zero in all these cases.
+  EXPECT_EQ(__llvm_libc::strcspn("a", "aa"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strcspn("aa", "a"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strcspn("aaa", "aa"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strcspn("aaaa", "aa"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strcspn("aaaa", "baa"), size_t{0});
+}

diff  --git a/libc/test/src/string/strpbrk_test.cpp b/libc/test/src/string/strpbrk_test.cpp
new file mode 100644
index 000000000000..498e8473a890
--- /dev/null
+++ b/libc/test/src/string/strpbrk_test.cpp
@@ -0,0 +1,62 @@
+//===-- Unittests for strpbrk ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strpbrk.h"
+
+#include "utils/UnitTest/Test.h"
+
+TEST(StrPBrkTest, EmptyStringShouldReturnNullptr) {
+  // The search should not include the null terminator.
+  EXPECT_STREQ(__llvm_libc::strpbrk("", ""), nullptr);
+  EXPECT_STREQ(__llvm_libc::strpbrk("_", ""), nullptr);
+  EXPECT_STREQ(__llvm_libc::strpbrk("", "_"), nullptr);
+}
+
+TEST(StrPBrkTest, ShouldNotFindAnythingAfterNullTerminator) {
+  const char src[4] = {'a', 'b', '\0', 'c'};
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "c"), nullptr);
+}
+
+TEST(StrPBrkTest, ShouldReturnNullptrIfNoCharactersFound) {
+  EXPECT_STREQ(__llvm_libc::strpbrk("12345", "abcdef"), nullptr);
+}
+
+TEST(StrPBrkTest, FindsFirstCharacter) {
+  const char *src = "12345";
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "1"), "12345");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "-1"), "12345");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "1_"), "12345");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "f1_"), "12345");
+  ASSERT_STREQ(src, "12345");
+}
+
+TEST(StrPBrkTest, FindsMiddleCharacter) {
+  const char *src = "12345";
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "3"), "345");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "?3"), "345");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "3F"), "345");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "z3_"), "345");
+  ASSERT_STREQ(src, "12345");
+}
+
+TEST(StrPBrkTest, FindsLastCharacter) {
+  const char *src = "12345";
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "5"), "5");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "r5"), "5");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "59"), "5");
+  EXPECT_STREQ(__llvm_libc::strpbrk(src, "n5_"), "5");
+  ASSERT_STREQ(src, "12345");
+}
+
+TEST(StrPBrkTest, FindsFirstOfRepeated) {
+  EXPECT_STREQ(__llvm_libc::strpbrk("A,B,C,D", ","), ",B,C,D");
+}
+
+TEST(StrPBrkTest, FindsFirstInBreakset) {
+  EXPECT_STREQ(__llvm_libc::strpbrk("12345", "34"), "345");
+}


        


More information about the libc-commits mailing list