[libc-commits] [libc] [libc]: Clean up unnecessary function pointers in scanf (PR #121215)

Vinay Deshmukh via libc-commits libc-commits at lists.llvm.org
Sat Dec 28 06:50:13 PST 2024


https://github.com/vinay-deshmukh updated https://github.com/llvm/llvm-project/pull/121215

>From 885d874655d9fb58a74b6c3ecec262278952953d Mon Sep 17 00:00:00 2001
From: Vinay Deshmukh <32487576+vinay-deshmukh at users.noreply.github.com>
Date: Mon, 23 Dec 2024 13:20:20 +0530
Subject: [PATCH 1/6] Initial moves

---
 libc/src/stdio/scanf_core/reader.cpp         | 61 +++++++++++++++++++-
 libc/src/stdio/scanf_core/reader.h           | 26 +--------
 libc/src/stdio/scanf_core/vfscanf_internal.h |  2 +-
 3 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
index ec1f5c098dc7a6..cc09f47709d6b8 100644
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ b/libc/src/stdio/scanf_core/reader.cpp
@@ -10,9 +10,67 @@
 #include "src/__support/macros/config.h"
 #include <stddef.h>
 
+#include "src/__support/File/file.h"
+
 namespace LIBC_NAMESPACE_DECL {
 namespace scanf_core {
 
+namespace internal {
+
+#if defined(LIBC_TARGET_ARCH_IS_GPU)
+// The GPU build provides FILE access through the host operating system's
+// library. So here we simply use the public entrypoints like in the SYSTEM_FILE
+// interface. Entrypoints should normally not call others, this is an exception.
+// FIXME: We do not acquire any locks here, so this is not thread safe.
+LIBC_INLINE int getc(void *f) {
+  return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
+}
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
+}
+
+#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+
+LIBC_INLINE int getc(void *f) {
+  unsigned char c;
+  auto result =
+      reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
+  size_t r = result.value;
+  if (result.has_error() || r != 1)
+    return '\0';
+
+  return c;
+}
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
+}
+
+#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+
+// Since ungetc_unlocked isn't always available, we don't acquire the lock for
+// system files.
+LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  ::ungetc(c, reinterpret_cast<::FILE *>(f));
+}
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+} // namespace internal
+
+char Reader::getc() {
+  ++cur_chars_read;
+  if (rb != nullptr) {
+    char output = rb->buffer[rb->buff_cur];
+    ++(rb->buff_cur);
+    return output;
+  }
+  // This should reset the buffer if applicable.
+  return static_cast<char>(internal::getc(input_stream));
+}
+
 void Reader::ungetc(char c) {
   --cur_chars_read;
   if (rb != nullptr && rb->buff_cur > 0) {
@@ -23,7 +81,8 @@ void Reader::ungetc(char c) {
     --(rb->buff_cur);
     return;
   }
-  stream_ungetc(static_cast<int>(c), input_stream);
+  internal::ungetc(static_cast<int>(c), input_stream);
 }
+
 } // namespace scanf_core
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index f984fd93789109..e29ea6ea7d8917 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -16,9 +16,6 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace scanf_core {
 
-using StreamGetc = int (*)(void *);
-using StreamUngetc = void (*)(int, void *);
-
 // This is intended to be either a raw string or a buffer syncronized with the
 // file's internal buffer.
 struct ReadBuffer {
@@ -29,38 +26,21 @@ struct ReadBuffer {
 
 class Reader {
   ReadBuffer *rb;
-
   void *input_stream = nullptr;
-
-  // TODO: Remove these unnecessary function pointers
-  StreamGetc stream_getc = nullptr;
-  StreamUngetc stream_ungetc = nullptr;
-
   size_t cur_chars_read = 0;
 
 public:
   // TODO: Set buff_len with a proper constant
   LIBC_INLINE Reader(ReadBuffer *string_buffer) : rb(string_buffer) {}
 
-  LIBC_INLINE Reader(void *stream, StreamGetc stream_getc_in,
-                     StreamUngetc stream_ungetc_in,
+  LIBC_INLINE Reader(void *stream,
                      ReadBuffer *stream_buffer = nullptr)
-      : rb(stream_buffer), input_stream(stream), stream_getc(stream_getc_in),
-        stream_ungetc(stream_ungetc_in) {}
+      : rb(stream_buffer), input_stream(stream){}
 
   // This returns the next character from the input and advances it by one
   // character. When it hits the end of the string or file it returns '\0' to
   // signal to stop parsing.
-  LIBC_INLINE char getc() {
-    ++cur_chars_read;
-    if (rb != nullptr) {
-      char output = rb->buffer[rb->buff_cur];
-      ++(rb->buff_cur);
-      return output;
-    }
-    // This should reset the buffer if applicable.
-    return static_cast<char>(stream_getc(input_stream));
-  }
+  char getc(); 
 
   // This moves the input back by one character, placing c into the buffer if
   // this is a file reader, else c is ignored.
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index 67126431fcded5..c96c672bc6fac5 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -103,7 +103,7 @@ LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream,
                                  const char *__restrict format,
                                  internal::ArgList &args) {
   internal::flockfile(stream);
-  scanf_core::Reader reader(stream, &internal::getc, internal::ungetc);
+  scanf_core::Reader reader(stream);
   int retval = scanf_core::scanf_main(&reader, format, args);
   if (retval == 0 && internal::ferror_unlocked(stream))
     retval = EOF;

>From 12f2ae071b882e25d47b635579583dbcf3886e22 Mon Sep 17 00:00:00 2001
From: Vinay Deshmukh <32487576+vinay-deshmukh at users.noreply.github.com>
Date: Fri, 27 Dec 2024 21:47:08 +0530
Subject: [PATCH 2/6] Remove duplicate definition

---
 libc/src/stdio/scanf_core/vfscanf_internal.h | 29 --------------------
 1 file changed, 29 deletions(-)

diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index c96c672bc6fac5..84d074711b8fb6 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -38,14 +38,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; }
 
 LIBC_INLINE void funlockfile(::FILE *) { return; }
 
-LIBC_INLINE int getc(void *f) {
-  return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
-}
-
-LIBC_INLINE void ungetc(int c, void *f) {
-  LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-
 LIBC_INLINE int ferror_unlocked(::FILE *f) { return LIBC_NAMESPACE::ferror(f); }
 
 #elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
@@ -58,21 +50,6 @@ LIBC_INLINE void funlockfile(FILE *f) {
   reinterpret_cast<LIBC_NAMESPACE::File *>(f)->unlock();
 }
 
-LIBC_INLINE int getc(void *f) {
-  unsigned char c;
-  auto result =
-      reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
-  size_t r = result.value;
-  if (result.has_error() || r != 1)
-    return '\0';
-
-  return c;
-}
-
-LIBC_INLINE void ungetc(int c, void *f) {
-  reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
-}
-
 LIBC_INLINE int ferror_unlocked(FILE *f) {
   return reinterpret_cast<LIBC_NAMESPACE::File *>(f)->error_unlocked();
 }
@@ -85,12 +62,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; }
 
 LIBC_INLINE void funlockfile(::FILE *) { return; }
 
-LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
-
-LIBC_INLINE void ungetc(int c, void *f) {
-  ::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-
 LIBC_INLINE int ferror_unlocked(::FILE *f) { return ::ferror(f); }
 
 #endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE

>From 6236e59ed06eb4e333533a092a423372e9f8d6e1 Mon Sep 17 00:00:00 2001
From: Vinay Deshmukh <32487576+vinay-deshmukh at users.noreply.github.com>
Date: Fri, 27 Dec 2024 21:52:04 +0530
Subject: [PATCH 3/6] Doesn't build but should in CI

---
 libc/src/stdio/scanf_core/reader.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
index cc09f47709d6b8..ccfcd220e19c21 100644
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ b/libc/src/stdio/scanf_core/reader.cpp
@@ -6,11 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "hdr/types/FILE.h"
 #include "src/stdio/scanf_core/reader.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/File/file.h"
+
 #include <stddef.h>
 
-#include "src/__support/File/file.h"
+
+
 
 namespace LIBC_NAMESPACE_DECL {
 namespace scanf_core {

>From f3b8d7f06befde1a7a3f6aa7e7e506a40334bcfb Mon Sep 17 00:00:00 2001
From: Vinay Deshmukh <32487576+vinay-deshmukh at users.noreply.github.com>
Date: Fri, 27 Dec 2024 22:05:53 +0530
Subject: [PATCH 4/6] clang-format patch

---
 libc/src/stdio/scanf_core/reader.cpp | 9 +++------
 libc/src/stdio/scanf_core/reader.h   | 7 +++----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
index ccfcd220e19c21..e2dcdd5e1e05a6 100644
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ b/libc/src/stdio/scanf_core/reader.cpp
@@ -6,16 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "hdr/types/FILE.h"
 #include "src/stdio/scanf_core/reader.h"
-#include "src/__support/macros/config.h"
+#include "hdr/types/FILE.h"
 #include "src/__support/File/file.h"
+#include "src/__support/macros/config.h"
 
 #include <stddef.h>
 
-
-
-
 namespace LIBC_NAMESPACE_DECL {
 namespace scanf_core {
 
@@ -51,7 +48,7 @@ LIBC_INLINE void ungetc(int c, void *f) {
   reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
 }
 
-#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+#else  // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
 
 // Since ungetc_unlocked isn't always available, we don't acquire the lock for
 // system files.
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index e29ea6ea7d8917..c8b113b6157956 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -33,14 +33,13 @@ class Reader {
   // TODO: Set buff_len with a proper constant
   LIBC_INLINE Reader(ReadBuffer *string_buffer) : rb(string_buffer) {}
 
-  LIBC_INLINE Reader(void *stream,
-                     ReadBuffer *stream_buffer = nullptr)
-      : rb(stream_buffer), input_stream(stream){}
+  LIBC_INLINE Reader(void *stream, ReadBuffer *stream_buffer = nullptr)
+      : rb(stream_buffer), input_stream(stream) {}
 
   // This returns the next character from the input and advances it by one
   // character. When it hits the end of the string or file it returns '\0' to
   // signal to stop parsing.
-  char getc(); 
+  char getc();
 
   // This moves the input back by one character, placing c into the buffer if
   // this is a file reader, else c is ignored.

>From bfe6b86d3547c25346d5dd2faca1a2677489ca70 Mon Sep 17 00:00:00 2001
From: Vinay Deshmukh <32487576+vinay-deshmukh at users.noreply.github.com>
Date: Fri, 27 Dec 2024 22:33:40 +0530
Subject: [PATCH 5/6] Put overlay include last

---
 libc/src/stdio/scanf_core/reader.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
index e2dcdd5e1e05a6..680c3b768f5d9e 100644
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ b/libc/src/stdio/scanf_core/reader.cpp
@@ -7,10 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/stdio/scanf_core/reader.h"
-#include "hdr/types/FILE.h"
 #include "src/__support/File/file.h"
 #include "src/__support/macros/config.h"
 
+#include "hdr/types/FILE.h"
+
 #include <stddef.h>
 
 namespace LIBC_NAMESPACE_DECL {

>From 1f9e27cfc8596057212a67713045f678f61cada6 Mon Sep 17 00:00:00 2001
From: Vinay Deshmukh <32487576+vinay-deshmukh at users.noreply.github.com>
Date: Sat, 28 Dec 2024 20:11:20 +0530
Subject: [PATCH 6/6] Attempt to fix linker error

From:
https://github.com/llvm/llvm-project/actions/runs/12518682283/job/34921546556?pr=121215
Error was:
```
/usr/bin/ld: libc/src/stdio/scanf_core/CMakeFiles/libc.src.stdio.scanf_core.reader.dir/./reader.cpp.o: in function `__llvm_libc_20_0_0_git::scanf_core::Reader::getc()':
reader.cpp:(.text._ZN22__llvm_libc_20_0_0_git10scanf_core6Reader4getcEv+0x38): undefined reference to `__llvm_libc_20_0_0_git::File::read_unlocked(void*, unsigned long)'
/usr/bin/ld: libc/src/stdio/scanf_core/CMakeFiles/libc.src.stdio.scanf_core.reader.dir/./reader.cpp.o: in function `__llvm_libc_20_0_0_git::scanf_core::Reader::ungetc(char)':
reader.cpp:(.text._ZN22__llvm_libc_20_0_0_git10scanf_core6Reader6ungetcEc+0x2c): undefined reference to `__llvm_libc_20_0_0_git::File::ungetc_unlocked(int)'
/usr/bin/ld: libc/test/src/stdio/libc.test.src.stdio.sscanf_test.__unit__.__build__: hidden symbol `_ZN22__llvm_libc_20_0_0_git4File13read_unlockedEPvm' isn't defined
/usr/bin/ld: final link failed: bad value
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
```
---
 libc/src/stdio/scanf_core/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index a8935d464417c2..8016ac4ec7c58f 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -62,6 +62,7 @@ add_object_library(
     reader.h
   DEPENDS
     libc.src.__support.macros.attributes
+    libc.hdr.types.FILE
 )
 
 add_object_library(



More information about the libc-commits mailing list