[libc-commits] [libc] a678f86 - [libc] Implement getopt
Alex Brachet via libc-commits
libc-commits at lists.llvm.org
Wed Oct 26 23:24:02 PDT 2022
Author: Alex Brachet
Date: 2022-10-27T06:23:33Z
New Revision: a678f86351c30a7d57197ffefab4e6e44e61a857
URL: https://github.com/llvm/llvm-project/commit/a678f86351c30a7d57197ffefab4e6e44e61a857
DIFF: https://github.com/llvm/llvm-project/commit/a678f86351c30a7d57197ffefab4e6e44e61a857.diff
LOG: [libc] Implement getopt
Differential Revision: https://reviews.llvm.org/D133487
Added:
libc/include/llvm-libc-types/__getoptargv_t.h
libc/src/unistd/getopt.cpp
libc/src/unistd/getopt.h
libc/test/src/unistd/getopt_test.cpp
Modified:
libc/config/linux/aarch64/entrypoints.txt
libc/config/linux/api.td
libc/config/linux/x86_64/entrypoints.txt
libc/include/CMakeLists.txt
libc/include/llvm-libc-types/CMakeLists.txt
libc/spec/posix.td
libc/src/unistd/CMakeLists.txt
libc/test/src/unistd/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 51da5f3c899d6..5b2e609be3e74 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -432,6 +432,11 @@ if(LLVM_LIBC_FULL_BUILD)
# unistd.h entrypoints
libc.src.unistd.environ
libc.src.unistd.execv
+ libc.src.unistd.getopt
+ libc.src.unistd.optarg
+ libc.src.unistd.optind
+ libc.src.unistd.optopt
+ libc.src.unistd.opterr
# sys/select.h entrypoints
libc.src.sys.select.select
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 6b69e3ff85f4a..25e19e83fbcf9 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -268,7 +268,7 @@ def DirentAPI : PublicAPI<"dirent.h"> {
}
def UniStdAPI : PublicAPI<"unistd.h"> {
- let Types = ["__exec_argv_t", "__exec_envp_t", "off_t", "pid_t", "size_t", "ssize_t", "uid_t"];
+ let Types = ["__exec_argv_t", "__exec_envp_t", "off_t", "pid_t", "size_t", "ssize_t", "uid_t", "__getoptargv_t"];
}
def SysRandomAPI : PublicAPI<"sys/random.h"> {
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index b2b617c71aad7..669c7f0461ab7 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -455,6 +455,11 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.unistd.execv
libc.src.unistd.fork
libc.src.unistd.__llvm_libc_syscall
+ libc.src.unistd.getopt
+ libc.src.unistd.optarg
+ libc.src.unistd.optind
+ libc.src.unistd.optopt
+ libc.src.unistd.opterr
# sys/select.h entrypoints
libc.src.sys.select.select
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index cb0e885622bc7..3e63695e12a2a 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -185,6 +185,7 @@ add_gen_header(
.llvm-libc-types.size_t
.llvm-libc-types.ssize_t
.llvm-libc-types.uid_t
+ .llvm-libc-types.__getoptargv_t
)
add_gen_header(
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 1c667d5a55b7a..e4e9567d5fdc9 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -81,3 +81,4 @@ add_header(__atexithandler_t HDR __atexithandler_t.h)
add_header(speed_t HDR speed_t.h)
add_header(tcflag_t HDR tcflag_t.h)
add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t)
+add_header(__getoptargv_t HDR __getoptargv_t.h)
diff --git a/libc/include/llvm-libc-types/__getoptargv_t.h b/libc/include/llvm-libc-types/__getoptargv_t.h
new file mode 100644
index 0000000000000..81c67286c3a76
--- /dev/null
+++ b/libc/include/llvm-libc-types/__getoptargv_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of type __getoptargv_t ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_GETOPTARGV_T_H__
+#define __LLVM_LIBC_TYPES_GETOPTARGV_T_H__
+
+typedef char *const __getoptargv_t[];
+
+#endif // __LLVM_LIBC_TYPES_GETOPTARGV_T_H__
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index e54b48887bef4..43b3319059c98 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -74,6 +74,8 @@ def FdSet : NamedType<"fd_set">;
def FdSetPtr : PtrType<FdSet>;
def RestrictedFdSetPtr : RestrictedPtrType<FdSet>;
+def GetoptArgvT : NamedType<"__getoptargv_t">;
+
def POSIX : StandardSpec<"POSIX"> {
PtrType CharPtr = PtrType<CharType>;
RestrictedPtrType RestrictedCharPtr = RestrictedPtrType<CharType>;
@@ -348,6 +350,7 @@ def POSIX : StandardSpec<"POSIX"> {
SizeTType,
PidT,
UidT,
+ GetoptArgvT,
],
[], // Enumerations
[
@@ -581,9 +584,30 @@ def POSIX : StandardSpec<"POSIX"> {
RetValSpec<SSizeTType>,
[ArgSpec<IntType>, ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType>]
>,
+ FunctionSpec<
+ "getopt",
+ RetValSpec<IntType>,
+ [ArgSpec<IntType>, ArgSpec<GetoptArgvT>, ArgSpec<ConstCharPtr>]
+ >,
],
[
ObjectSpec<"environ", "char **">,
+ ObjectSpec<
+ "optarg",
+ "char *"
+ >,
+ ObjectSpec<
+ "optind",
+ "int"
+ >,
+ ObjectSpec<
+ "opterr",
+ "int"
+ >,
+ ObjectSpec<
+ "optopt",
+ "int"
+ >,
]
>;
diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt
index cd4002e35d7a1..ab05175c25b77 100644
--- a/libc/src/unistd/CMakeLists.txt
+++ b/libc/src/unistd/CMakeLists.txt
@@ -254,3 +254,37 @@ add_entrypoint_object(
HDRS
environ.h
)
+
+add_entrypoint_object(
+ getopt
+ SRCS
+ getopt.cpp
+ HDRS
+ getopt.h
+ DEPENDS
+ libc.include.unistd
+ libc.src.__support.CPP.optional
+ libc.src.__support.CPP.string_view
+ libc.src.__support.File.file
+ libc.src.stdio.fprintf
+)
+
+# These aren't actual external per-se, but this is just an easy way to create
+# targets that don't do anything. They exist to be referenced in entrypoints.txt
+# so that the header will properly expose their definitions. Their declaration
+# is in getopt.cpp.
+add_entrypoint_external(
+ optarg
+)
+
+add_entrypoint_external(
+ optind
+)
+
+add_entrypoint_external(
+ optopt
+)
+
+add_entrypoint_external(
+ opterr
+)
diff --git a/libc/src/unistd/getopt.cpp b/libc/src/unistd/getopt.cpp
new file mode 100644
index 0000000000000..fac326b0c33bb
--- /dev/null
+++ b/libc/src/unistd/getopt.cpp
@@ -0,0 +1,200 @@
+//===-- Implementation of getopt ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/getopt.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/File/file.h"
+#include "src/__support/common.h"
+#include "src/stdio/fprintf.h"
+
+#include <stdio.h>
+
+// This is POSIX compliant and does not support GNU extensions, mainly this is
+// just the re-ordering of argv elements such that unknown arguments can be
+// easily iterated over.
+
+namespace __llvm_libc {
+
+template <typename T> struct RefWrapper {
+ RefWrapper(T *ptr) : ptr(ptr) {}
+ RefWrapper &operator=(const RefWrapper &) = default;
+ operator T &() { return *ptr; }
+ T &get() { return *ptr; }
+ T *ptr;
+};
+
+struct GetoptContext {
+ RefWrapper<char *> optarg;
+ RefWrapper<int> optind;
+ RefWrapper<int> optopt;
+ RefWrapper<unsigned> optpos;
+
+ int opterr;
+
+ FILE *errstream;
+
+ GetoptContext &operator=(const GetoptContext &) = default;
+
+ template <typename... Ts> void report_error(const char *fmt, Ts... ts) {
+ if (opterr)
+ __llvm_libc::fprintf(errstream, fmt, ts...);
+ }
+};
+
+struct OptstringParser {
+ using value_type = struct {
+ char c;
+ bool arg;
+ };
+
+ cpp::string_view optstring;
+
+ struct iterator {
+ cpp::string_view curr;
+
+ iterator operator++() {
+ curr = curr.substr(1);
+ return *this;
+ }
+
+ bool operator!=(iterator other) { return curr.data() != other.curr.data(); }
+
+ value_type operator*() {
+ value_type r{curr.front(), false};
+ if (!curr.substr(1).empty() && curr.substr(1).front() == ':') {
+ this->operator++();
+ r.arg = true;
+ }
+ return r;
+ }
+ };
+
+ iterator begin() {
+ bool skip = optstring.front() == '-' || optstring.front() == '+' ||
+ optstring.front() == ':';
+ return {optstring.substr(!!skip)};
+ }
+
+ iterator end() { return {optstring.substr(optstring.size())}; }
+};
+
+int getopt_r(int argc, char *const argv[], const char *optstring,
+ GetoptContext &ctx) {
+ auto failure = [&ctx](int ret = -1) {
+ ctx.optpos.get() = 0;
+ return ret;
+ };
+
+ if (ctx.optind >= argc || !argv[ctx.optind])
+ return failure();
+
+ cpp::string_view current =
+ cpp::string_view{argv[ctx.optind]}.substr(ctx.optpos);
+
+ auto move_forward = [¤t, &ctx] {
+ current = current.substr(1);
+ ctx.optpos.get()++;
+ };
+
+ // If optpos is nonzero, then we are already parsing a valid flag and these
+ // need not be checked.
+ if (ctx.optpos == 0) {
+ if (current[0] != '-')
+ return failure();
+
+ if (current == "--") {
+ ctx.optind.get()++;
+ return failure();
+ }
+
+ // Eat the '-' char.
+ move_forward();
+ if (current.empty())
+ return failure();
+ }
+
+ auto find_match =
+ [current, optstring]() -> cpp::optional<OptstringParser::value_type> {
+ for (auto i : OptstringParser{optstring})
+ if (i.c == current[0])
+ return i;
+ return {};
+ };
+
+ auto match = find_match();
+ if (!match) {
+ ctx.report_error("%s: illegal option -- %c\n", argv[0], current[0]);
+ ctx.optopt.get() = current[0];
+ return failure('?');
+ }
+
+ // We've matched so eat that character.
+ move_forward();
+ if (match->arg) {
+ // If we found an option that takes an argument and our current is not over,
+ // the rest of current is that argument. Ie, "-cabc" with opstring "c:",
+ // then optarg should point to "abc". Otherwise the argument to c will be in
+ // the next arg like "-c abc".
+ if (!current.empty()) {
+ // This const cast is fine because current was already holding a mutable
+ // string, it just doesn't have the semantics to note that, we could use
+ // span but it doesn't have string_view string niceties.
+ ctx.optarg.get() = const_cast<char *>(current.data());
+ } else {
+ // One char lookahead to see if we ran out of arguments. If so, return ':'
+ // if the first character of optstring is ':'. optind must stay at the
+ // current value so only increase it after we known there is another arg.
+ if (ctx.optind + 1 >= argc || !argv[ctx.optind + 1]) {
+ ctx.report_error("%s: option requires an argument -- %c\n", argv[0],
+ match->c);
+ return failure(optstring[0] == ':' ? ':' : '?');
+ }
+ ctx.optarg.get() = argv[++ctx.optind];
+ }
+ ctx.optind++;
+ ctx.optpos.get() = 0;
+ } else if (current.empty()) {
+ // If this argument is now empty we are safe to move onto the next one.
+ ctx.optind++;
+ ctx.optpos.get() = 0;
+ }
+
+ return match->c;
+}
+
+namespace impl {
+
+extern "C" char *optarg;
+extern "C" int optind = 1;
+extern "C" int optopt;
+extern "C" int opterr;
+
+static unsigned optpos;
+
+static GetoptContext ctx{
+ &impl::optarg, &impl::optind,
+ &impl::optopt, &optpos,
+ impl::opterr, reinterpret_cast<FILE *>(__llvm_libc::stderr)};
+
+#ifndef LLVM_LIBC_PUBLIC_PACKAGING
+// This is used exclusively in tests.
+void set_getopt_state(char **optarg, int *optind, int *optopt, unsigned *optpos,
+ int opterr, FILE *errstream) {
+ ctx = {optarg, optind, optopt, optpos, opterr, errstream};
+}
+#endif
+
+} // namespace impl
+
+LLVM_LIBC_FUNCTION(int, getopt,
+ (int argc, char *const argv[], const char *optstring)) {
+ return getopt_r(argc, argv, optstring, impl::ctx);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/unistd/getopt.h b/libc/src/unistd/getopt.h
new file mode 100644
index 0000000000000..bf5f8d040c944
--- /dev/null
+++ b/libc/src/unistd/getopt.h
@@ -0,0 +1,25 @@
+//===-- Implementation header for getopt ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_UNISTD_GETOPT_H
+#define LLVM_LIBC_SRC_UNISTD_GETOPT_H
+
+#include <stdio.h>
+#include <unistd.h>
+
+namespace __llvm_libc {
+
+namespace impl {
+void set_getopt_state(char **, int *, int *, unsigned *, int, FILE *);
+}
+
+int getopt(int argc, char *const argv[], const char *optstring);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_UNISTD_GETOPT_H
diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
index 85b6f5c5e6798..b278c35eaa6cf 100644
--- a/libc/test/src/unistd/CMakeLists.txt
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -402,3 +402,16 @@ add_libc_unittest(
libc.include.unistd
libc.src.unistd.sysconf
)
+
+add_libc_unittest(
+ getopt_test
+ SUITE
+ libc_unistd_unittests
+ SRCS
+ getopt_test.cpp
+ DEPENDS
+ libc.src.unistd.getopt
+ libc.src.__support.CPP.array
+ libc.src.stdio.fopencookie
+ libc.src.stdio.fflush
+)
diff --git a/libc/test/src/unistd/getopt_test.cpp b/libc/test/src/unistd/getopt_test.cpp
new file mode 100644
index 0000000000000..d043e5881f6c7
--- /dev/null
+++ b/libc/test/src/unistd/getopt_test.cpp
@@ -0,0 +1,169 @@
+//===-- Unittests for getopt ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/getopt.h"
+#include "utils/UnitTest/Test.h"
+
+#include "src/__support/CPP/array.h"
+#include "src/stdio/fflush.h"
+#include "src/stdio/fopencookie.h"
+
+#include <stdio.h>
+
+using __llvm_libc::cpp::array;
+
+namespace test_globals {
+char *optarg;
+int optind = 1;
+int optopt;
+int opterr = 1;
+
+unsigned optpos;
+} // namespace test_globals
+
+// This can't be a constructor because it will get run before the constructor
+// which sets the default state in getopt.
+void set_state(FILE *errstream) {
+ __llvm_libc::impl::set_getopt_state(
+ &test_globals::optarg, &test_globals::optind, &test_globals::optopt,
+ &test_globals::optpos, test_globals::opterr, errstream);
+}
+
+// TODO: <stdio> could be either llvm-libc's or the system libc's. The former
+// doesn't currently support fmemopen but does have fopencookie. In the future
+// just use that instead. This memopen does no error checking for the size
+// of the buffer, etc.
+FILE *memopen(char **pos) {
+ static auto memcpy = [](char *dest, const char *src, size_t size) {
+ for (size_t i = 0; i < size; i++)
+ dest[i] = src[i];
+ };
+
+ static auto *write =
+ +[](void *cookie, const char *buf, size_t size) -> ssize_t {
+ char **pos = static_cast<char **>(cookie);
+ memcpy(*pos, buf, size);
+ *pos += size;
+ return size;
+ };
+
+ static cookie_io_functions_t cookie{nullptr, write, nullptr, nullptr};
+ return __llvm_libc::fopencookie(pos, "w", cookie);
+}
+
+struct LlvmLibcGetoptTest : public __llvm_libc::testing::Test {
+ FILE *errstream;
+ char buf[256];
+ char *pos = buf;
+
+ void reset_errstream() { pos = buf; }
+ const char *get_error_msg() {
+ __llvm_libc::fflush(errstream);
+ return buf;
+ }
+
+ void SetUp() override {
+ ASSERT_TRUE(!!(errstream = memopen(&pos)));
+ set_state(errstream);
+ ASSERT_EQ(test_globals::optind, 1);
+ }
+
+ void TearDown() override {
+ test_globals::optind = 1;
+ test_globals::opterr = 1;
+ }
+};
+
+// This is safe because getopt doesn't currently permute argv like GNU's getopt
+// does so this just helps silence warnings.
+char *operator"" _c(const char *c, size_t) { return const_cast<char *>(c); }
+
+TEST_F(LlvmLibcGetoptTest, NoMatch) {
+ array<char *, 3> argv{"prog"_c, "arg1"_c, nullptr};
+
+ // optind >= argc
+ EXPECT_EQ(__llvm_libc::getopt(1, argv.data(), "..."), -1);
+
+ // argv[optind] == nullptr
+ test_globals::optind = 2;
+ EXPECT_EQ(__llvm_libc::getopt(100, argv.data(), "..."), -1);
+
+ // argv[optind][0] != '-'
+ test_globals::optind = 1;
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1);
+ ASSERT_EQ(test_globals::optind, 1);
+
+ // argv[optind] == "-"
+ argv[1] = "-"_c;
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1);
+ ASSERT_EQ(test_globals::optind, 1);
+
+ // argv[optind] == "--", then return -1 and incremement optind
+ argv[1] = "--"_c;
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1);
+ EXPECT_EQ(test_globals::optind, 2);
+}
+
+TEST_F(LlvmLibcGetoptTest, WrongMatch) {
+ array<char *, 3> argv{"prog"_c, "-b"_c, nullptr};
+
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), int('?'));
+ EXPECT_EQ(test_globals::optopt, (int)'b');
+ EXPECT_EQ(test_globals::optind, 1);
+ EXPECT_STREQ(get_error_msg(), "prog: illegal option -- b\n");
+}
+
+TEST_F(LlvmLibcGetoptTest, OpterrFalse) {
+ array<char *, 3> argv{"prog"_c, "-b"_c, nullptr};
+
+ test_globals::opterr = 0;
+ set_state(errstream);
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), int('?'));
+ EXPECT_EQ(test_globals::optopt, (int)'b');
+ EXPECT_EQ(test_globals::optind, 1);
+ EXPECT_STREQ(get_error_msg(), "");
+}
+
+TEST_F(LlvmLibcGetoptTest, MissingArg) {
+ array<char *, 3> argv{"prog"_c, "-b"_c, nullptr};
+
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), ":b:"), (int)':');
+ ASSERT_EQ(test_globals::optind, 1);
+ EXPECT_STREQ(get_error_msg(), "prog: option requires an argument -- b\n");
+ reset_errstream();
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "b:"), int('?'));
+ EXPECT_EQ(test_globals::optind, 1);
+ EXPECT_STREQ(get_error_msg(), "prog: option requires an argument -- b\n");
+}
+
+TEST_F(LlvmLibcGetoptTest, ParseArgInCurrent) {
+ array<char *, 3> argv{"prog"_c, "-barg"_c, nullptr};
+
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "b:"), (int)'b');
+ EXPECT_STREQ(test_globals::optarg, "arg");
+ EXPECT_EQ(test_globals::optind, 2);
+}
+
+TEST_F(LlvmLibcGetoptTest, ParseArgInNext) {
+ array<char *, 4> argv{"prog"_c, "-b"_c, "arg"_c, nullptr};
+
+ EXPECT_EQ(__llvm_libc::getopt(3, argv.data(), "b:"), (int)'b');
+ EXPECT_STREQ(test_globals::optarg, "arg");
+ EXPECT_EQ(test_globals::optind, 3);
+}
+
+TEST_F(LlvmLibcGetoptTest, ParseMutliInOne) {
+ array<char *, 3> argv{"prog"_c, "-abc"_c, nullptr};
+
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'a');
+ ASSERT_EQ(test_globals::optind, 1);
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'b');
+ ASSERT_EQ(test_globals::optind, 1);
+ EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'c');
+ EXPECT_EQ(test_globals::optind, 2);
+}
More information about the libc-commits
mailing list