[libc-commits] [libc] 5fd03c8 - [libc] Implement getopt

Alex Brachet via libc-commits libc-commits at lists.llvm.org
Mon Oct 31 09:56:18 PDT 2022


Author: Alex Brachet
Date: 2022-10-31T16:55:53Z
New Revision: 5fd03c81767f27ef190ca08ca940cf32a62417dd

URL: https://github.com/llvm/llvm-project/commit/5fd03c81767f27ef190ca08ca940cf32a62417dd
DIFF: https://github.com/llvm/llvm-project/commit/5fd03c81767f27ef190ca08ca940cf32a62417dd.diff

LOG: [libc] Implement getopt

Differential Revision: https://reviews.llvm.org/D133487

Added: 
    libc/include/llvm-libc-types/__getoptargv_t.h
    libc/src/unistd/getopt.cpp
    libc/src/unistd/getopt.h
    libc/test/src/unistd/getopt_test.cpp

Modified: 
    libc/config/linux/aarch64/entrypoints.txt
    libc/config/linux/api.td
    libc/config/linux/x86_64/entrypoints.txt
    libc/include/CMakeLists.txt
    libc/include/llvm-libc-types/CMakeLists.txt
    libc/spec/posix.td
    libc/src/unistd/CMakeLists.txt
    libc/test/src/unistd/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 51da5f3c899d6..5b2e609be3e74 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -432,6 +432,11 @@ if(LLVM_LIBC_FULL_BUILD)
     # unistd.h entrypoints
     libc.src.unistd.environ
     libc.src.unistd.execv
+    libc.src.unistd.getopt
+    libc.src.unistd.optarg
+    libc.src.unistd.optind
+    libc.src.unistd.optopt
+    libc.src.unistd.opterr
 
     # sys/select.h entrypoints
     libc.src.sys.select.select

diff  --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 6b69e3ff85f4a..25e19e83fbcf9 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -268,7 +268,7 @@ def DirentAPI : PublicAPI<"dirent.h"> {
 }
 
 def UniStdAPI : PublicAPI<"unistd.h"> {
-  let Types = ["__exec_argv_t", "__exec_envp_t", "off_t", "pid_t", "size_t", "ssize_t", "uid_t"];
+  let Types = ["__exec_argv_t", "__exec_envp_t", "off_t", "pid_t", "size_t", "ssize_t", "uid_t", "__getoptargv_t"];
 }
 
 def SysRandomAPI : PublicAPI<"sys/random.h"> {

diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 40c6c33ab05c9..56e38f0f13fdd 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -457,6 +457,11 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.unistd.execv
     libc.src.unistd.fork
     libc.src.unistd.__llvm_libc_syscall
+    libc.src.unistd.getopt
+    libc.src.unistd.optarg
+    libc.src.unistd.optind
+    libc.src.unistd.optopt
+    libc.src.unistd.opterr
 
     # sys/select.h entrypoints
     libc.src.sys.select.select

diff  --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index cb0e885622bc7..3e63695e12a2a 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -185,6 +185,7 @@ add_gen_header(
     .llvm-libc-types.size_t
     .llvm-libc-types.ssize_t
     .llvm-libc-types.uid_t
+    .llvm-libc-types.__getoptargv_t
 )
 
 add_gen_header(

diff  --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 1c667d5a55b7a..e4e9567d5fdc9 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -81,3 +81,4 @@ add_header(__atexithandler_t HDR __atexithandler_t.h)
 add_header(speed_t HDR speed_t.h)
 add_header(tcflag_t HDR tcflag_t.h)
 add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t)
+add_header(__getoptargv_t HDR __getoptargv_t.h)

diff  --git a/libc/include/llvm-libc-types/__getoptargv_t.h b/libc/include/llvm-libc-types/__getoptargv_t.h
new file mode 100644
index 0000000000000..81c67286c3a76
--- /dev/null
+++ b/libc/include/llvm-libc-types/__getoptargv_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of type __getoptargv_t ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_GETOPTARGV_T_H__
+#define __LLVM_LIBC_TYPES_GETOPTARGV_T_H__
+
+typedef char *const __getoptargv_t[];
+
+#endif // __LLVM_LIBC_TYPES_GETOPTARGV_T_H__

diff  --git a/libc/spec/posix.td b/libc/spec/posix.td
index e54b48887bef4..43b3319059c98 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -74,6 +74,8 @@ def FdSet : NamedType<"fd_set">;
 def FdSetPtr : PtrType<FdSet>;
 def RestrictedFdSetPtr : RestrictedPtrType<FdSet>;
 
+def GetoptArgvT : NamedType<"__getoptargv_t">;
+
 def POSIX : StandardSpec<"POSIX"> {
   PtrType CharPtr = PtrType<CharType>;
   RestrictedPtrType RestrictedCharPtr = RestrictedPtrType<CharType>;
@@ -348,6 +350,7 @@ def POSIX : StandardSpec<"POSIX"> {
       SizeTType,
       PidT,
       UidT,
+      GetoptArgvT,
     ],
     [], // Enumerations
     [
@@ -581,9 +584,30 @@ def POSIX : StandardSpec<"POSIX"> {
           RetValSpec<SSizeTType>,
           [ArgSpec<IntType>, ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType>]
         >,
+        FunctionSpec<
+          "getopt",
+          RetValSpec<IntType>,
+          [ArgSpec<IntType>, ArgSpec<GetoptArgvT>, ArgSpec<ConstCharPtr>]
+        >,
     ],
     [
         ObjectSpec<"environ", "char **">,
+        ObjectSpec<
+          "optarg",
+          "char *"
+        >,
+        ObjectSpec<
+          "optind",
+          "int"
+        >,
+        ObjectSpec<
+          "opterr",
+          "int"
+        >,
+        ObjectSpec<
+          "optopt",
+          "int"
+        >,
     ]
   >;
 

diff  --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt
index cd4002e35d7a1..ab05175c25b77 100644
--- a/libc/src/unistd/CMakeLists.txt
+++ b/libc/src/unistd/CMakeLists.txt
@@ -254,3 +254,37 @@ add_entrypoint_object(
   HDRS
     environ.h
 )
+
+add_entrypoint_object(
+  getopt
+  SRCS
+    getopt.cpp
+  HDRS
+    getopt.h
+  DEPENDS
+    libc.include.unistd
+    libc.src.__support.CPP.optional
+    libc.src.__support.CPP.string_view
+    libc.src.__support.File.file
+    libc.src.stdio.fprintf
+)
+
+# These aren't actual external per-se, but this is just an easy way to create
+# targets that don't do anything. They exist to be referenced in entrypoints.txt
+# so that the header will properly expose their definitions. Their declaration
+# is in getopt.cpp.
+add_entrypoint_external(
+  optarg
+)
+
+add_entrypoint_external(
+  optind
+)
+
+add_entrypoint_external(
+  optopt
+)
+
+add_entrypoint_external(
+  opterr
+)

diff  --git a/libc/src/unistd/getopt.cpp b/libc/src/unistd/getopt.cpp
new file mode 100644
index 0000000000000..9d3b53400560e
--- /dev/null
+++ b/libc/src/unistd/getopt.cpp
@@ -0,0 +1,200 @@
+//===-- Implementation of getopt ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/getopt.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/File/file.h"
+#include "src/__support/common.h"
+#include "src/stdio/fprintf.h"
+
+#include <stdio.h>
+
+// This is POSIX compliant and does not support GNU extensions, mainly this is
+// just the re-ordering of argv elements such that unknown arguments can be
+// easily iterated over.
+
+namespace __llvm_libc {
+
+template <typename T> struct RefWrapper {
+  RefWrapper(T *ptr) : ptr(ptr) {}
+  RefWrapper &operator=(const RefWrapper &) = default;
+  operator T &() { return *ptr; }
+  T &get() { return *ptr; }
+  T *ptr;
+};
+
+struct GetoptContext {
+  RefWrapper<char *> optarg;
+  RefWrapper<int> optind;
+  RefWrapper<int> optopt;
+  RefWrapper<unsigned> optpos;
+
+  int opterr;
+
+  FILE *errstream;
+
+  GetoptContext &operator=(const GetoptContext &) = default;
+
+  template <typename... Ts> void report_error(const char *fmt, Ts... ts) {
+    if (opterr)
+      __llvm_libc::fprintf(errstream, fmt, ts...);
+  }
+};
+
+struct OptstringParser {
+  using value_type = struct {
+    char c;
+    bool arg;
+  };
+
+  cpp::string_view optstring;
+
+  struct iterator {
+    cpp::string_view curr;
+
+    iterator operator++() {
+      curr = curr.substr(1);
+      return *this;
+    }
+
+    bool operator!=(iterator other) { return curr.data() != other.curr.data(); }
+
+    value_type operator*() {
+      value_type r{curr.front(), false};
+      if (!curr.substr(1).empty() && curr.substr(1).front() == ':') {
+        this->operator++();
+        r.arg = true;
+      }
+      return r;
+    }
+  };
+
+  iterator begin() {
+    bool skip = optstring.front() == '-' || optstring.front() == '+' ||
+                optstring.front() == ':';
+    return {optstring.substr(!!skip)};
+  }
+
+  iterator end() { return {optstring.substr(optstring.size())}; }
+};
+
+int getopt_r(int argc, char *const argv[], const char *optstring,
+             GetoptContext &ctx) {
+  auto failure = [&ctx](int ret = -1) {
+    ctx.optpos.get() = 0;
+    return ret;
+  };
+
+  if (ctx.optind >= argc || !argv[ctx.optind])
+    return failure();
+
+  cpp::string_view current =
+      cpp::string_view{argv[ctx.optind]}.substr(ctx.optpos);
+
+  auto move_forward = [&current, &ctx] {
+    current = current.substr(1);
+    ctx.optpos.get()++;
+  };
+
+  // If optpos is nonzero, then we are already parsing a valid flag and these
+  // need not be checked.
+  if (ctx.optpos == 0) {
+    if (current[0] != '-')
+      return failure();
+
+    if (current == "--") {
+      ctx.optind.get()++;
+      return failure();
+    }
+
+    // Eat the '-' char.
+    move_forward();
+    if (current.empty())
+      return failure();
+  }
+
+  auto find_match =
+      [current, optstring]() -> cpp::optional<OptstringParser::value_type> {
+    for (auto i : OptstringParser{optstring})
+      if (i.c == current[0])
+        return i;
+    return {};
+  };
+
+  auto match = find_match();
+  if (!match) {
+    ctx.report_error("%s: illegal option -- %c\n", argv[0], current[0]);
+    ctx.optopt.get() = current[0];
+    return failure('?');
+  }
+
+  // We've matched so eat that character.
+  move_forward();
+  if (match->arg) {
+    // If we found an option that takes an argument and our current is not over,
+    // the rest of current is that argument. Ie, "-cabc" with opstring "c:",
+    // then optarg should point to "abc". Otherwise the argument to c will be in
+    // the next arg like "-c abc".
+    if (!current.empty()) {
+      // This const cast is fine because current was already holding a mutable
+      // string, it just doesn't have the semantics to note that, we could use
+      // span but it doesn't have string_view string niceties.
+      ctx.optarg.get() = const_cast<char *>(current.data());
+    } else {
+      // One char lookahead to see if we ran out of arguments. If so, return ':'
+      // if the first character of optstring is ':'. optind must stay at the
+      // current value so only increase it after we known there is another arg.
+      if (ctx.optind + 1 >= argc || !argv[ctx.optind + 1]) {
+        ctx.report_error("%s: option requires an argument -- %c\n", argv[0],
+                         match->c);
+        return failure(optstring[0] == ':' ? ':' : '?');
+      }
+      ctx.optarg.get() = argv[++ctx.optind];
+    }
+    ctx.optind++;
+    ctx.optpos.get() = 0;
+  } else if (current.empty()) {
+    // If this argument is now empty we are safe to move onto the next one.
+    ctx.optind++;
+    ctx.optpos.get() = 0;
+  }
+
+  return match->c;
+}
+
+namespace impl {
+
+extern "C" char *optarg = nullptr;
+extern "C" int optind = 1;
+extern "C" int optopt = 0;
+extern "C" int opterr = 0;
+
+static unsigned optpos;
+
+static GetoptContext ctx{
+    &impl::optarg, &impl::optind,
+    &impl::optopt, &optpos,
+    impl::opterr,  reinterpret_cast<FILE *>(__llvm_libc::stderr)};
+
+#ifndef LLVM_LIBC_PUBLIC_PACKAGING
+// This is used exclusively in tests.
+void set_getopt_state(char **optarg, int *optind, int *optopt, unsigned *optpos,
+                      int opterr, FILE *errstream) {
+  ctx = {optarg, optind, optopt, optpos, opterr, errstream};
+}
+#endif
+
+} // namespace impl
+
+LLVM_LIBC_FUNCTION(int, getopt,
+                   (int argc, char *const argv[], const char *optstring)) {
+  return getopt_r(argc, argv, optstring, impl::ctx);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/unistd/getopt.h b/libc/src/unistd/getopt.h
new file mode 100644
index 0000000000000..bf5f8d040c944
--- /dev/null
+++ b/libc/src/unistd/getopt.h
@@ -0,0 +1,25 @@
+//===-- Implementation header for getopt ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_UNISTD_GETOPT_H
+#define LLVM_LIBC_SRC_UNISTD_GETOPT_H
+
+#include <stdio.h>
+#include <unistd.h>
+
+namespace __llvm_libc {
+
+namespace impl {
+void set_getopt_state(char **, int *, int *, unsigned *, int, FILE *);
+}
+
+int getopt(int argc, char *const argv[], const char *optstring);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_UNISTD_GETOPT_H

diff  --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
index 85b6f5c5e6798..b278c35eaa6cf 100644
--- a/libc/test/src/unistd/CMakeLists.txt
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -402,3 +402,16 @@ add_libc_unittest(
     libc.include.unistd
     libc.src.unistd.sysconf
 )
+
+add_libc_unittest(
+  getopt_test
+  SUITE
+    libc_unistd_unittests
+  SRCS
+    getopt_test.cpp
+  DEPENDS
+    libc.src.unistd.getopt
+    libc.src.__support.CPP.array
+    libc.src.stdio.fopencookie
+    libc.src.stdio.fflush
+)

diff  --git a/libc/test/src/unistd/getopt_test.cpp b/libc/test/src/unistd/getopt_test.cpp
new file mode 100644
index 0000000000000..d043e5881f6c7
--- /dev/null
+++ b/libc/test/src/unistd/getopt_test.cpp
@@ -0,0 +1,169 @@
+//===-- Unittests for getopt ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/getopt.h"
+#include "utils/UnitTest/Test.h"
+
+#include "src/__support/CPP/array.h"
+#include "src/stdio/fflush.h"
+#include "src/stdio/fopencookie.h"
+
+#include <stdio.h>
+
+using __llvm_libc::cpp::array;
+
+namespace test_globals {
+char *optarg;
+int optind = 1;
+int optopt;
+int opterr = 1;
+
+unsigned optpos;
+} // namespace test_globals
+
+// This can't be a constructor because it will get run before the constructor
+// which sets the default state in getopt.
+void set_state(FILE *errstream) {
+  __llvm_libc::impl::set_getopt_state(
+      &test_globals::optarg, &test_globals::optind, &test_globals::optopt,
+      &test_globals::optpos, test_globals::opterr, errstream);
+}
+
+// TODO: <stdio> could be either llvm-libc's or the system libc's. The former
+// doesn't currently support fmemopen but does have fopencookie. In the future
+// just use that instead. This memopen does no error checking for the size
+// of the buffer, etc.
+FILE *memopen(char **pos) {
+  static auto memcpy = [](char *dest, const char *src, size_t size) {
+    for (size_t i = 0; i < size; i++)
+      dest[i] = src[i];
+  };
+
+  static auto *write =
+      +[](void *cookie, const char *buf, size_t size) -> ssize_t {
+    char **pos = static_cast<char **>(cookie);
+    memcpy(*pos, buf, size);
+    *pos += size;
+    return size;
+  };
+
+  static cookie_io_functions_t cookie{nullptr, write, nullptr, nullptr};
+  return __llvm_libc::fopencookie(pos, "w", cookie);
+}
+
+struct LlvmLibcGetoptTest : public __llvm_libc::testing::Test {
+  FILE *errstream;
+  char buf[256];
+  char *pos = buf;
+
+  void reset_errstream() { pos = buf; }
+  const char *get_error_msg() {
+    __llvm_libc::fflush(errstream);
+    return buf;
+  }
+
+  void SetUp() override {
+    ASSERT_TRUE(!!(errstream = memopen(&pos)));
+    set_state(errstream);
+    ASSERT_EQ(test_globals::optind, 1);
+  }
+
+  void TearDown() override {
+    test_globals::optind = 1;
+    test_globals::opterr = 1;
+  }
+};
+
+// This is safe because getopt doesn't currently permute argv like GNU's getopt
+// does so this just helps silence warnings.
+char *operator"" _c(const char *c, size_t) { return const_cast<char *>(c); }
+
+TEST_F(LlvmLibcGetoptTest, NoMatch) {
+  array<char *, 3> argv{"prog"_c, "arg1"_c, nullptr};
+
+  // optind >= argc
+  EXPECT_EQ(__llvm_libc::getopt(1, argv.data(), "..."), -1);
+
+  // argv[optind] == nullptr
+  test_globals::optind = 2;
+  EXPECT_EQ(__llvm_libc::getopt(100, argv.data(), "..."), -1);
+
+  // argv[optind][0] != '-'
+  test_globals::optind = 1;
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1);
+  ASSERT_EQ(test_globals::optind, 1);
+
+  // argv[optind] == "-"
+  argv[1] = "-"_c;
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1);
+  ASSERT_EQ(test_globals::optind, 1);
+
+  // argv[optind] == "--", then return -1 and incremement optind
+  argv[1] = "--"_c;
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1);
+  EXPECT_EQ(test_globals::optind, 2);
+}
+
+TEST_F(LlvmLibcGetoptTest, WrongMatch) {
+  array<char *, 3> argv{"prog"_c, "-b"_c, nullptr};
+
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), int('?'));
+  EXPECT_EQ(test_globals::optopt, (int)'b');
+  EXPECT_EQ(test_globals::optind, 1);
+  EXPECT_STREQ(get_error_msg(), "prog: illegal option -- b\n");
+}
+
+TEST_F(LlvmLibcGetoptTest, OpterrFalse) {
+  array<char *, 3> argv{"prog"_c, "-b"_c, nullptr};
+
+  test_globals::opterr = 0;
+  set_state(errstream);
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), int('?'));
+  EXPECT_EQ(test_globals::optopt, (int)'b');
+  EXPECT_EQ(test_globals::optind, 1);
+  EXPECT_STREQ(get_error_msg(), "");
+}
+
+TEST_F(LlvmLibcGetoptTest, MissingArg) {
+  array<char *, 3> argv{"prog"_c, "-b"_c, nullptr};
+
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), ":b:"), (int)':');
+  ASSERT_EQ(test_globals::optind, 1);
+  EXPECT_STREQ(get_error_msg(), "prog: option requires an argument -- b\n");
+  reset_errstream();
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "b:"), int('?'));
+  EXPECT_EQ(test_globals::optind, 1);
+  EXPECT_STREQ(get_error_msg(), "prog: option requires an argument -- b\n");
+}
+
+TEST_F(LlvmLibcGetoptTest, ParseArgInCurrent) {
+  array<char *, 3> argv{"prog"_c, "-barg"_c, nullptr};
+
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "b:"), (int)'b');
+  EXPECT_STREQ(test_globals::optarg, "arg");
+  EXPECT_EQ(test_globals::optind, 2);
+}
+
+TEST_F(LlvmLibcGetoptTest, ParseArgInNext) {
+  array<char *, 4> argv{"prog"_c, "-b"_c, "arg"_c, nullptr};
+
+  EXPECT_EQ(__llvm_libc::getopt(3, argv.data(), "b:"), (int)'b');
+  EXPECT_STREQ(test_globals::optarg, "arg");
+  EXPECT_EQ(test_globals::optind, 3);
+}
+
+TEST_F(LlvmLibcGetoptTest, ParseMutliInOne) {
+  array<char *, 3> argv{"prog"_c, "-abc"_c, nullptr};
+
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'a');
+  ASSERT_EQ(test_globals::optind, 1);
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'b');
+  ASSERT_EQ(test_globals::optind, 1);
+  EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'c');
+  EXPECT_EQ(test_globals::optind, 2);
+}


        


More information about the libc-commits mailing list