[libc-commits] [libc] 1ceafe5 - [libc] Add implementation of ungetc.
Siva Chandra Reddy via libc-commits
libc-commits at lists.llvm.org
Wed Nov 2 15:46:15 PDT 2022
Author: Siva Chandra Reddy
Date: 2022-11-02T22:45:57Z
New Revision: 1ceafe5e0f694797dab3b44a93ac8b098739d47f
URL: https://github.com/llvm/llvm-project/commit/1ceafe5e0f694797dab3b44a93ac8b098739d47f
DIFF: https://github.com/llvm/llvm-project/commit/1ceafe5e0f694797dab3b44a93ac8b098739d47f.diff
LOG: [libc] Add implementation of ungetc.
A bug in the file read logic has also been fixed along the way. Parts
of the ungetc tests will fail without that bug fixed.
Reviewed By: michaelrj
Differential Revision: https://reviews.llvm.org/D137286
Added:
libc/src/stdio/ungetc.cpp
libc/src/stdio/ungetc.h
libc/test/src/stdio/ungetc_test.cpp
Modified:
libc/config/linux/x86_64/entrypoints.txt
libc/spec/stdc.td
libc/src/__support/File/file.cpp
libc/src/__support/File/file.h
libc/src/stdio/CMakeLists.txt
libc/test/src/stdio/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 7b0fb53451004..17f2c994c12fb 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -396,6 +396,7 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.stdio.stderr
libc.src.stdio.stdin
libc.src.stdio.stdout
+ libc.src.stdio.ungetc
# stdlib.h entrypoints
libc.src.stdlib._Exit
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index a7a9df46747f4..69a3ac1e26d8b 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -641,6 +641,11 @@ def StdC : StandardSpec<"stdc"> {
ArgSpec<ConstCharRestrictedPtr>,
ArgSpec<VarArgType>]
>,
+ FunctionSpec<
+ "ungetc",
+ RetValSpec<IntType>,
+ [ArgSpec<IntType>, ArgSpec<FILEPtr>]
+ >,
],
[
ObjectSpec<
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index b5d00b7a876da..352b1a4d24005 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -203,10 +203,14 @@ size_t File::read_unlocked(void *data, size_t len) {
for (size_t i = 0; i < available_data; ++i)
dataref[i] = bufref[i + pos];
read_limit = pos = 0; // Reset the pointers.
+ // Update the dataref to reflect that fact that we have already
+ // copied |available_data| into |data|.
+ dataref = cpp::span<uint8_t>(dataref.data() + available_data,
+ dataref.size() - available_data);
size_t to_fetch = len - available_data;
if (to_fetch > bufsize) {
- size_t fetched_size = platform_read(this, data, to_fetch);
+ size_t fetched_size = platform_read(this, dataref.data(), to_fetch);
if (fetched_size < to_fetch) {
if (errno == 0)
eof = true;
@@ -233,6 +237,44 @@ size_t File::read_unlocked(void *data, size_t len) {
return transfer_size + available_data;
}
+int File::ungetc_unlocked(int c) {
+ // There is no meaning to unget if:
+ // 1. You are trying to push back EOF.
+ // 2. Read operations are not allowed on this file.
+ // 3. The previous operation was a write operation.
+ if (c == EOF || !read_allowed() || (prev_op == FileOp::WRITE))
+ return EOF;
+
+ cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize);
+ if (read_limit == 0) {
+ // If |read_limit| is zero, it can mean three things:
+ // a. This file was just created.
+ // b. The previous operation was a seek operation.
+ // c. The previous operation was a read operation which emptied
+ // the buffer.
+ // For all the above cases, we simply write |c| at the beginning
+ // of the buffer and bump |read_limit|. Note that |pos| will also
+ // be zero in this case, so we don't need to adjust it.
+ bufref[0] = static_cast<unsigned char>(c);
+ ++read_limit;
+ } else {
+ // If |read_limit| is non-zero, it means that there is data in the buffer
+ // from a previous read operation. Which would also mean that |pos| is not
+ // zero. So, we decrement |pos| and write |c| in to the buffer at the new
+ // |pos|. If too many ungetc operations are performed without reads, it
+ // can lead to (pos == 0 but read_limit != 0). We will just error out in
+ // such a case.
+ if (pos == 0)
+ return EOF;
+ --pos;
+ bufref[pos] = static_cast<unsigned char>(c);
+ }
+
+ eof = false; // There is atleast one character that can be read now.
+ err = false; // This operation was a success.
+ return c;
+}
+
int File::seek(long offset, int whence) {
FileLock lock(this);
if (prev_op == FileOp::WRITE && pos > 0) {
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 74655b1301b85..7ea780d94f555 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -187,6 +187,14 @@ class File {
int flush_unlocked();
+ // Returns EOF on error and keeps the file unchanged.
+ int ungetc_unlocked(int c);
+
+ int ungetc(int c) {
+ FileLock lock(this);
+ return ungetc_unlocked(c);
+ }
+
// Sets the internal buffer to |buffer| with buffering mode |mode|.
// |size| is the size of |buffer|. This new |buffer| is owned by the
// stream only if |owned| is true.
diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
index 22536a515bd58..f8b197d984c52 100644
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -282,6 +282,18 @@ add_entrypoint_object(
libc.src.__support.File.platform_file
)
+add_entrypoint_object(
+ ungetc
+ SRCS
+ ungetc.cpp
+ HDRS
+ ungetc.h
+ DEPENDS
+ libc.include.stdio
+ libc.src.__support.File.file
+ libc.src.__support.File.platform_file
+)
+
add_entrypoint_object(
fopencookie
SRCS
diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/ungetc.cpp
new file mode 100644
index 0000000000000..de6ce0ba0683d
--- /dev/null
+++ b/libc/src/stdio/ungetc.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of ungetc ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/ungetc.h"
+#include "src/__support/File/file.h"
+
+#include <stdio.h>
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) {
+ return reinterpret_cast<__llvm_libc::File *>(stream)->ungetc(c);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/ungetc.h b/libc/src/stdio/ungetc.h
new file mode 100644
index 0000000000000..b5b7acb5962c1
--- /dev/null
+++ b/libc/src/stdio/ungetc.h
@@ -0,0 +1,20 @@
+//===-- Implementation header of ungetc -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_UNGETC_H
+#define LLVM_LIBC_SRC_STDIO_UNGETC_H
+
+#include <stdio.h>
+
+namespace __llvm_libc {
+
+int ungetc(int c, ::FILE *stream);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_UNGETC_H
diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt
index 515619e2aa822..904c669d63da6 100644
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -21,6 +21,22 @@ add_libc_unittest(
libc.src.stdio.fwrite
)
+add_libc_unittest(
+ ungetc_test
+ SUITE
+ libc_stdio_unittests
+ SRCS
+ ungetc_test.cpp
+ DEPENDS
+ libc.include.stdio
+ libc.src.stdio.fclose
+ libc.src.stdio.fopen
+ libc.src.stdio.fread
+ libc.src.stdio.fseek
+ libc.src.stdio.fwrite
+ libc.src.stdio.ungetc
+)
+
add_libc_unittest(
unlocked_fileop_test
SUITE
diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp
new file mode 100644
index 0000000000000..0102be7b2e0fb
--- /dev/null
+++ b/libc/test/src/stdio/ungetc_test.cpp
@@ -0,0 +1,59 @@
+//===-- Unittests for ungetc ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/fclose.h"
+#include "src/stdio/fopen.h"
+#include "src/stdio/fread.h"
+#include "src/stdio/fseek.h"
+#include "src/stdio/fwrite.h"
+#include "src/stdio/ungetc.h"
+#include "utils/UnitTest/Test.h"
+
+#include <stdio.h>
+
+TEST(LlvmLibcUngetcTest, UngetAndReadBack) {
+ constexpr char FILENAME[] = "testdata/ungetc_test.test";
+ ::FILE *file = __llvm_libc::fopen(FILENAME, "w");
+ ASSERT_FALSE(file == nullptr);
+ constexpr char CONTENT[] = "abcdef";
+ constexpr size_t CONTENT_SIZE = sizeof(CONTENT);
+ ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file));
+ // Cannot unget to an un-readable file.
+ ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file));
+ ASSERT_EQ(0, __llvm_libc::fclose(file));
+
+ file = __llvm_libc::fopen(FILENAME, "r+");
+ ASSERT_FALSE(file == nullptr);
+ char c;
+ ASSERT_EQ(__llvm_libc::fread(&c, 1, 1, file), size_t(1));
+ ASSERT_EQ(c, CONTENT[0]);
+ ASSERT_EQ(__llvm_libc::ungetc(int(c), file), int(c));
+
+ char data[CONTENT_SIZE];
+ ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, file));
+ ASSERT_STREQ(CONTENT, data);
+
+ ASSERT_EQ(0, __llvm_libc::fseek(file, 0, SEEK_SET));
+ // ungetc should not fail after a seek operation.
+ int unget_char = 'z';
+ ASSERT_EQ(unget_char, __llvm_libc::ungetc(unget_char, file));
+ // Another unget should fail.
+ ASSERT_EQ(EOF, __llvm_libc::ungetc(unget_char, file));
+ // ungetting a char at the beginning of the file will allow us to fetch
+ // one additional character.
+ char new_data[CONTENT_SIZE + 1];
+ ASSERT_EQ(CONTENT_SIZE + 1,
+ __llvm_libc::fread(new_data, 1, CONTENT_SIZE + 1, file));
+ ASSERT_STREQ("zabcdef", new_data);
+
+ ASSERT_EQ(size_t(1), __llvm_libc::fwrite("x", 1, 1, file));
+ // unget should fail after a write operation.
+ ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file));
+
+ ASSERT_EQ(0, __llvm_libc::fclose(file));
+}
More information about the libc-commits
mailing list