[libc-commits] [libc] [libc] Rework the 'fgets' implementation on the GPU (PR #69635)

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Thu Oct 19 12:47:22 PDT 2023


https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/69635

Summary:
The `fgets` function as implemented is not functional currently when
called with multiple threads. This is because we rely on reapeatedly
polling the character to detect EOF. This doesn't work when there are
multiple threads that may with to poll the characters. this patch pulls
out the logic into a standalone RPC call to handle this in a single
operation such that calling it from multiple threads functions as
expected. It also makes it less slow because we no longer make N RPC
calls for N characters.


>From 344c5b067e0fb25ac31ba9167b6988d210865f92 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6 at vols.utk.edu>
Date: Thu, 19 Oct 2023 14:16:36 -0500
Subject: [PATCH] [libc] Rework the 'fgets' implementation on the GPU

Summary:
The `fgets` function as implemented is not functional currently when
called with multiple threads. This is because we rely on reapeatedly
polling the character to detect EOF. This doesn't work when there are
multiple threads that may with to poll the characters. this patch pulls
out the logic into a standalone RPC call to handle this in a single
operation such that calling it from multiple threads functions as
expected. It also makes it less slow because we no longer make N RPC
calls for N characters.
---
 libc/include/llvm-libc-types/rpc_opcodes_t.h |  1 +
 libc/src/stdio/gpu/fgets.cpp                 | 28 +++++++++-----------
 libc/utils/gpu/server/rpc_server.cpp         | 16 +++++++++++
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 2fd318f06a7db16..7b85428dd3445e8 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -17,6 +17,7 @@ typedef enum {
   RPC_WRITE_TO_STREAM,
   RPC_WRITE_TO_STDOUT_NEWLINE,
   RPC_READ_FROM_STREAM,
+  RPC_READ_FGETS,
   RPC_OPEN_FILE,
   RPC_CLOSE_FILE,
   RPC_MALLOC,
diff --git a/libc/src/stdio/gpu/fgets.cpp b/libc/src/stdio/gpu/fgets.cpp
index 4cabd15001a7f54..5ea4bdcdc9e0fef 100644
--- a/libc/src/stdio/gpu/fgets.cpp
+++ b/libc/src/stdio/gpu/fgets.cpp
@@ -22,24 +22,20 @@ LLVM_LIBC_FUNCTION(char *, fgets,
   if (count < 1)
     return nullptr;
 
-  // This implementation is very slow as it makes multiple RPC calls.
-  unsigned char c = '\0';
-  int i = 0;
-  for (; i < count - 1 && c != '\n'; ++i) {
-    auto r = file::read(stream, &c, 1);
-    if (r != 1)
-      break;
-
-    str[i] = c;
-  }
-
-  bool has_error = LIBC_NAMESPACE::ferror(stream);
-  bool has_eof = LIBC_NAMESPACE::feof(stream);
-
-  if (has_error || (i == 0 && has_eof))
+  uint64_t recv_size;
+  void *buf = nullptr;
+  rpc::Client::Port port = rpc::client.open<RPC_READ_FGETS>();
+  port.send([=](rpc::Buffer *buffer) {
+    buffer->data[0] = count;
+    buffer->data[1] = file::from_stream(stream);
+  });
+  port.recv_n(&buf, &recv_size,
+              [&](uint64_t) { return reinterpret_cast<void *>(str); });
+  port.close();
+
+  if (recv_size == 0)
     return nullptr;
 
-  str[i] = '\0';
   return str;
 }
 
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 0550115f7cd1a1c..05e900edc6993bb 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -101,6 +101,22 @@ struct Server {
       });
       break;
     }
+    case RPC_READ_FGETS: {
+      uint64_t sizes[lane_size] = {0};
+      void *data[lane_size] = {nullptr};
+      port->recv([&](rpc::Buffer *buffer, uint32_t id) {
+        data[id] = new char[buffer->data[0]];
+        const char *str =
+            fgets(reinterpret_cast<char *>(data[id]), buffer->data[0],
+                  file::to_stream(buffer->data[1]));
+        sizes[id] = !str ? 0 : std::strlen(str) + 1;
+      });
+      port->send_n(data, sizes);
+      for (uint32_t id = 0; id < lane_size; ++id)
+        if (data[id])
+          delete[] reinterpret_cast<uint8_t *>(data[id]);
+      break;
+    }
     case RPC_OPEN_FILE: {
       uint64_t sizes[lane_size] = {0};
       void *paths[lane_size] = {nullptr};



More information about the libc-commits mailing list