[Openmp-commits] [openmp] [libc] Change the `puts` implementation on the GPU (PR #67189)

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Fri Sep 22 13:00:39 PDT 2023


https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/67189

Summary:
Normally, the implementation of `puts` simply writes a second newline
charcter after printing the first string. However, because the GPU does
everything in batches of the SIMT group size, this will end up with very
poor output where you get the strings printed and then 1-64 newline
characters all in a row. Optimizations like to turn `printf` calls into
`puts` so it's a good idea to make this produce the expected output.

The least invasive way I could do this was to add a new opcode. It's a
little bloated, but it avoids an unneccessary and slow send operation to
configure this.


>From b40b882c87d367d567cfb905df881b2681787b80 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6 at vols.utk.edu>
Date: Fri, 22 Sep 2023 14:56:39 -0500
Subject: [PATCH] [libc] Change the `puts` implementation on the GPU

Summary:
Normally, the implementation of `puts` simply writes a second newline
charcter after printing the first string. However, because the GPU does
everything in batches of the SIMT group size, this will end up with very
poor output where you get the strings printed and then 1-64 newline
characters all in a row. Optimizations like to turn `printf` calls into
`puts` so it's a good idea to make this produce the expected output.

The least invasive way I could do this was to add a new opcode. It's a
little bloated, but it avoids an unneccessary and slow send operation to
configure this.
---
 libc/include/llvm-libc-types/rpc_opcodes_t.h | 21 ++++++++++---------
 libc/src/stdio/gpu/puts.cpp                  |  8 +++----
 libc/utils/gpu/server/rpc_server.cpp         | 22 ++++++++++++--------
 openmp/libomptarget/test/libc/puts.c         |  2 +-
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 9895269767d0037..fb0f19cf505e8dc 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -15,16 +15,17 @@ typedef enum : unsigned short {
   RPC_WRITE_TO_STDOUT = 2,
   RPC_WRITE_TO_STDERR = 3,
   RPC_WRITE_TO_STREAM = 4,
-  RPC_READ_FROM_STREAM = 5,
-  RPC_OPEN_FILE = 6,
-  RPC_CLOSE_FILE = 7,
-  RPC_MALLOC = 8,
-  RPC_FREE = 9,
-  RPC_HOST_CALL = 10,
-  RPC_ABORT = 11,
-  RPC_FEOF = 12,
-  RPC_FERROR = 13,
-  RPC_CLEARERR = 14,
+  RPC_WRITE_TO_STDOUT_NEWLINE = 5,
+  RPC_READ_FROM_STREAM = 6,
+  RPC_OPEN_FILE = 7,
+  RPC_CLOSE_FILE = 8,
+  RPC_MALLOC = 9,
+  RPC_FREE = 10,
+  RPC_HOST_CALL = 11,
+  RPC_ABORT = 12,
+  RPC_FEOF = 13,
+  RPC_FERROR = 14,
+  RPC_CLEARERR = 15,
 } rpc_opcode_t;
 
 #endif // __LLVM_LIBC_TYPES_RPC_OPCODE_H__
diff --git a/libc/src/stdio/gpu/puts.cpp b/libc/src/stdio/gpu/puts.cpp
index 58a3534c57ef99f..e50e2cc7d55d506 100644
--- a/libc/src/stdio/gpu/puts.cpp
+++ b/libc/src/stdio/gpu/puts.cpp
@@ -17,11 +17,9 @@ namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(int, puts, (const char *__restrict str)) {
   cpp::string_view str_view(str);
-  auto written = file::write(stdout, str, str_view.size());
-  if (written != str_view.size())
-    return EOF;
-  written = file::write(stdout, "\n", 1);
-  if (written != 1)
+  auto written = file::write_impl<RPC_WRITE_TO_STDOUT_NEWLINE>(stdout, str,
+                                                               str_view.size());
+  if (written != str_view.size() + 1)
     return EOF;
   return 0;
 }
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 7493ed66ceecb8c..a772cd1d22e5073 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -59,23 +59,27 @@ struct Server {
     switch (port->get_opcode()) {
     case RPC_WRITE_TO_STREAM:
     case RPC_WRITE_TO_STDERR:
-    case RPC_WRITE_TO_STDOUT: {
+    case RPC_WRITE_TO_STDOUT:
+    case RPC_WRITE_TO_STDOUT_NEWLINE: {
       uint64_t sizes[lane_size] = {0};
       void *strs[lane_size] = {nullptr};
       FILE *files[lane_size] = {nullptr};
-      if (port->get_opcode() == RPC_WRITE_TO_STREAM)
+      if (port->get_opcode() == RPC_WRITE_TO_STREAM) {
         port->recv([&](rpc::Buffer *buffer, uint32_t id) {
           files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
         });
+      } else if (port->get_opcode() == RPC_WRITE_TO_STDERR) {
+        std::fill(files, files + lane_size, stderr);
+      } else {
+        std::fill(files, files + lane_size, stdout);
+      }
+
       port->recv_n(strs, sizes, [&](uint64_t size) { return new char[size]; });
       port->send([&](rpc::Buffer *buffer, uint32_t id) {
-        FILE *file =
-            port->get_opcode() == RPC_WRITE_TO_STDOUT
-                ? stdout
-                : (port->get_opcode() == RPC_WRITE_TO_STDERR ? stderr
-                                                             : files[id]);
-        uint64_t ret = fwrite(strs[id], 1, sizes[id], file);
-        std::memcpy(buffer->data, &ret, sizeof(uint64_t));
+        buffer->data[0] = fwrite(strs[id], 1, sizes[id], files[id]);
+        if (port->get_opcode() == RPC_WRITE_TO_STDOUT_NEWLINE &&
+            buffer->data[0] == sizes[id])
+          buffer->data[0] += fwrite("\n", 1, 1, files[id]);
         delete[] reinterpret_cast<uint8_t *>(strs[id]);
       });
       break;
diff --git a/openmp/libomptarget/test/libc/puts.c b/openmp/libomptarget/test/libc/puts.c
index 18d87ed1b36ae65..0e363f55296184b 100644
--- a/openmp/libomptarget/test/libc/puts.c
+++ b/openmp/libomptarget/test/libc/puts.c
@@ -31,5 +31,5 @@ int main() {
 // CHECK: PASS
 #pragma omp target teams num_teams(4)
 #pragma omp parallel num_threads(2)
-  { fputs("PASS\n", stdout); }
+  { puts("PASS\n"); }
 }



More information about the Openmp-commits mailing list