[libc-commits] [libc] 1ceafe5 - [libc] Add implementation of ungetc.

Siva Chandra Reddy via libc-commits libc-commits at lists.llvm.org
Wed Nov 2 15:46:15 PDT 2022


Author: Siva Chandra Reddy
Date: 2022-11-02T22:45:57Z
New Revision: 1ceafe5e0f694797dab3b44a93ac8b098739d47f

URL: https://github.com/llvm/llvm-project/commit/1ceafe5e0f694797dab3b44a93ac8b098739d47f
DIFF: https://github.com/llvm/llvm-project/commit/1ceafe5e0f694797dab3b44a93ac8b098739d47f.diff

LOG: [libc] Add implementation of ungetc.

A bug in the file read logic has also been fixed along the way. Parts
of the ungetc tests will fail without that bug fixed.

Reviewed By: michaelrj

Differential Revision: https://reviews.llvm.org/D137286

Added: 
    libc/src/stdio/ungetc.cpp
    libc/src/stdio/ungetc.h
    libc/test/src/stdio/ungetc_test.cpp

Modified: 
    libc/config/linux/x86_64/entrypoints.txt
    libc/spec/stdc.td
    libc/src/__support/File/file.cpp
    libc/src/__support/File/file.h
    libc/src/stdio/CMakeLists.txt
    libc/test/src/stdio/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 7b0fb53451004..17f2c994c12fb 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -396,6 +396,7 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.stdio.stderr
     libc.src.stdio.stdin
     libc.src.stdio.stdout
+    libc.src.stdio.ungetc
 
     # stdlib.h entrypoints
     libc.src.stdlib._Exit

diff  --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index a7a9df46747f4..69a3ac1e26d8b 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -641,6 +641,11 @@ def StdC : StandardSpec<"stdc"> {
                ArgSpec<ConstCharRestrictedPtr>,
                ArgSpec<VarArgType>]
           >,
+          FunctionSpec<
+              "ungetc",
+              RetValSpec<IntType>,
+              [ArgSpec<IntType>, ArgSpec<FILEPtr>]
+          >,
       ],
       [
           ObjectSpec<

diff  --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index b5d00b7a876da..352b1a4d24005 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -203,10 +203,14 @@ size_t File::read_unlocked(void *data, size_t len) {
   for (size_t i = 0; i < available_data; ++i)
     dataref[i] = bufref[i + pos];
   read_limit = pos = 0; // Reset the pointers.
+  // Update the dataref to reflect that fact that we have already
+  // copied |available_data| into |data|.
+  dataref = cpp::span<uint8_t>(dataref.data() + available_data,
+                               dataref.size() - available_data);
 
   size_t to_fetch = len - available_data;
   if (to_fetch > bufsize) {
-    size_t fetched_size = platform_read(this, data, to_fetch);
+    size_t fetched_size = platform_read(this, dataref.data(), to_fetch);
     if (fetched_size < to_fetch) {
       if (errno == 0)
         eof = true;
@@ -233,6 +237,44 @@ size_t File::read_unlocked(void *data, size_t len) {
   return transfer_size + available_data;
 }
 
+int File::ungetc_unlocked(int c) {
+  // There is no meaning to unget if:
+  // 1. You are trying to push back EOF.
+  // 2. Read operations are not allowed on this file.
+  // 3. The previous operation was a write operation.
+  if (c == EOF || !read_allowed() || (prev_op == FileOp::WRITE))
+    return EOF;
+
+  cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize);
+  if (read_limit == 0) {
+    // If |read_limit| is zero, it can mean three things:
+    //   a. This file was just created.
+    //   b. The previous operation was a seek operation.
+    //   c. The previous operation was a read operation which emptied
+    //      the buffer.
+    // For all the above cases, we simply write |c| at the beginning
+    // of the buffer and bump |read_limit|. Note that |pos| will also
+    // be zero in this case, so we don't need to adjust it.
+    bufref[0] = static_cast<unsigned char>(c);
+    ++read_limit;
+  } else {
+    // If |read_limit| is non-zero, it means that there is data in the buffer
+    // from a previous read operation. Which would also mean that |pos| is not
+    // zero. So, we decrement |pos| and write |c| in to the buffer at the new
+    // |pos|. If too many ungetc operations are performed without reads, it
+    // can lead to (pos == 0 but read_limit != 0). We will just error out in
+    // such a case.
+    if (pos == 0)
+      return EOF;
+    --pos;
+    bufref[pos] = static_cast<unsigned char>(c);
+  }
+
+  eof = false; // There is atleast one character that can be read now.
+  err = false; // This operation was a success.
+  return c;
+}
+
 int File::seek(long offset, int whence) {
   FileLock lock(this);
   if (prev_op == FileOp::WRITE && pos > 0) {

diff  --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 74655b1301b85..7ea780d94f555 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -187,6 +187,14 @@ class File {
 
   int flush_unlocked();
 
+  // Returns EOF on error and keeps the file unchanged.
+  int ungetc_unlocked(int c);
+
+  int ungetc(int c) {
+    FileLock lock(this);
+    return ungetc_unlocked(c);
+  }
+
   // Sets the internal buffer to |buffer| with buffering mode |mode|.
   // |size| is the size of |buffer|. This new |buffer| is owned by the
   // stream only if |owned| is true.

diff  --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
index 22536a515bd58..f8b197d984c52 100644
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -282,6 +282,18 @@ add_entrypoint_object(
     libc.src.__support.File.platform_file
 )
 
+add_entrypoint_object(
+  ungetc
+  SRCS
+    ungetc.cpp
+  HDRS
+    ungetc.h
+  DEPENDS
+    libc.include.stdio
+    libc.src.__support.File.file
+    libc.src.__support.File.platform_file
+)
+
 add_entrypoint_object(
   fopencookie
   SRCS

diff  --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/ungetc.cpp
new file mode 100644
index 0000000000000..de6ce0ba0683d
--- /dev/null
+++ b/libc/src/stdio/ungetc.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of ungetc ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/ungetc.h"
+#include "src/__support/File/file.h"
+
+#include <stdio.h>
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) {
+  return reinterpret_cast<__llvm_libc::File *>(stream)->ungetc(c);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/stdio/ungetc.h b/libc/src/stdio/ungetc.h
new file mode 100644
index 0000000000000..b5b7acb5962c1
--- /dev/null
+++ b/libc/src/stdio/ungetc.h
@@ -0,0 +1,20 @@
+//===-- Implementation header of ungetc -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_UNGETC_H
+#define LLVM_LIBC_SRC_STDIO_UNGETC_H
+
+#include <stdio.h>
+
+namespace __llvm_libc {
+
+int ungetc(int c, ::FILE *stream);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_UNGETC_H

diff  --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt
index 515619e2aa822..904c669d63da6 100644
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -21,6 +21,22 @@ add_libc_unittest(
     libc.src.stdio.fwrite
 )
 
+add_libc_unittest(
+  ungetc_test
+  SUITE
+    libc_stdio_unittests
+  SRCS
+    ungetc_test.cpp
+  DEPENDS
+    libc.include.stdio
+    libc.src.stdio.fclose
+    libc.src.stdio.fopen
+    libc.src.stdio.fread
+    libc.src.stdio.fseek
+    libc.src.stdio.fwrite
+    libc.src.stdio.ungetc
+)
+
 add_libc_unittest(
   unlocked_fileop_test
   SUITE

diff  --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp
new file mode 100644
index 0000000000000..0102be7b2e0fb
--- /dev/null
+++ b/libc/test/src/stdio/ungetc_test.cpp
@@ -0,0 +1,59 @@
+//===-- Unittests for ungetc ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/fclose.h"
+#include "src/stdio/fopen.h"
+#include "src/stdio/fread.h"
+#include "src/stdio/fseek.h"
+#include "src/stdio/fwrite.h"
+#include "src/stdio/ungetc.h"
+#include "utils/UnitTest/Test.h"
+
+#include <stdio.h>
+
+TEST(LlvmLibcUngetcTest, UngetAndReadBack) {
+  constexpr char FILENAME[] = "testdata/ungetc_test.test";
+  ::FILE *file = __llvm_libc::fopen(FILENAME, "w");
+  ASSERT_FALSE(file == nullptr);
+  constexpr char CONTENT[] = "abcdef";
+  constexpr size_t CONTENT_SIZE = sizeof(CONTENT);
+  ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file));
+  // Cannot unget to an un-readable file.
+  ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file));
+  ASSERT_EQ(0, __llvm_libc::fclose(file));
+
+  file = __llvm_libc::fopen(FILENAME, "r+");
+  ASSERT_FALSE(file == nullptr);
+  char c;
+  ASSERT_EQ(__llvm_libc::fread(&c, 1, 1, file), size_t(1));
+  ASSERT_EQ(c, CONTENT[0]);
+  ASSERT_EQ(__llvm_libc::ungetc(int(c), file), int(c));
+
+  char data[CONTENT_SIZE];
+  ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, file));
+  ASSERT_STREQ(CONTENT, data);
+
+  ASSERT_EQ(0, __llvm_libc::fseek(file, 0, SEEK_SET));
+  // ungetc should not fail after a seek operation.
+  int unget_char = 'z';
+  ASSERT_EQ(unget_char, __llvm_libc::ungetc(unget_char, file));
+  // Another unget should fail.
+  ASSERT_EQ(EOF, __llvm_libc::ungetc(unget_char, file));
+  // ungetting a char at the beginning of the file will allow us to fetch
+  // one additional character.
+  char new_data[CONTENT_SIZE + 1];
+  ASSERT_EQ(CONTENT_SIZE + 1,
+            __llvm_libc::fread(new_data, 1, CONTENT_SIZE + 1, file));
+  ASSERT_STREQ("zabcdef", new_data);
+
+  ASSERT_EQ(size_t(1), __llvm_libc::fwrite("x", 1, 1, file));
+  // unget should fail after a write operation.
+  ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file));
+
+  ASSERT_EQ(0, __llvm_libc::fclose(file));
+}


        


More information about the libc-commits mailing list