[Openmp-commits] [openmp] [libc] Change the `puts` implementation on the GPU (PR #67189)
via Openmp-commits
openmp-commits at lists.llvm.org
Fri Sep 22 13:01:38 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
<details>
<summary>Changes</summary>
Summary:
Normally, the implementation of `puts` simply writes a second newline
charcter after printing the first string. However, because the GPU does
everything in batches of the SIMT group size, this will end up with very
poor output where you get the strings printed and then 1-64 newline
characters all in a row. Optimizations like to turn `printf` calls into
`puts` so it's a good idea to make this produce the expected output.
The least invasive way I could do this was to add a new opcode. It's a
little bloated, but it avoids an unneccessary and slow send operation to
configure this.
---
Full diff: https://github.com/llvm/llvm-project/pull/67189.diff
4 Files Affected:
- (modified) libc/include/llvm-libc-types/rpc_opcodes_t.h (+11-10)
- (modified) libc/src/stdio/gpu/puts.cpp (+3-5)
- (modified) libc/utils/gpu/server/rpc_server.cpp (+13-9)
- (modified) openmp/libomptarget/test/libc/puts.c (+1-1)
``````````diff
diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 9895269767d0037..fb0f19cf505e8dc 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -15,16 +15,17 @@ typedef enum : unsigned short {
RPC_WRITE_TO_STDOUT = 2,
RPC_WRITE_TO_STDERR = 3,
RPC_WRITE_TO_STREAM = 4,
- RPC_READ_FROM_STREAM = 5,
- RPC_OPEN_FILE = 6,
- RPC_CLOSE_FILE = 7,
- RPC_MALLOC = 8,
- RPC_FREE = 9,
- RPC_HOST_CALL = 10,
- RPC_ABORT = 11,
- RPC_FEOF = 12,
- RPC_FERROR = 13,
- RPC_CLEARERR = 14,
+ RPC_WRITE_TO_STDOUT_NEWLINE = 5,
+ RPC_READ_FROM_STREAM = 6,
+ RPC_OPEN_FILE = 7,
+ RPC_CLOSE_FILE = 8,
+ RPC_MALLOC = 9,
+ RPC_FREE = 10,
+ RPC_HOST_CALL = 11,
+ RPC_ABORT = 12,
+ RPC_FEOF = 13,
+ RPC_FERROR = 14,
+ RPC_CLEARERR = 15,
} rpc_opcode_t;
#endif // __LLVM_LIBC_TYPES_RPC_OPCODE_H__
diff --git a/libc/src/stdio/gpu/puts.cpp b/libc/src/stdio/gpu/puts.cpp
index 58a3534c57ef99f..e50e2cc7d55d506 100644
--- a/libc/src/stdio/gpu/puts.cpp
+++ b/libc/src/stdio/gpu/puts.cpp
@@ -17,11 +17,9 @@ namespace __llvm_libc {
LLVM_LIBC_FUNCTION(int, puts, (const char *__restrict str)) {
cpp::string_view str_view(str);
- auto written = file::write(stdout, str, str_view.size());
- if (written != str_view.size())
- return EOF;
- written = file::write(stdout, "\n", 1);
- if (written != 1)
+ auto written = file::write_impl<RPC_WRITE_TO_STDOUT_NEWLINE>(stdout, str,
+ str_view.size());
+ if (written != str_view.size() + 1)
return EOF;
return 0;
}
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 7493ed66ceecb8c..a772cd1d22e5073 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -59,23 +59,27 @@ struct Server {
switch (port->get_opcode()) {
case RPC_WRITE_TO_STREAM:
case RPC_WRITE_TO_STDERR:
- case RPC_WRITE_TO_STDOUT: {
+ case RPC_WRITE_TO_STDOUT:
+ case RPC_WRITE_TO_STDOUT_NEWLINE: {
uint64_t sizes[lane_size] = {0};
void *strs[lane_size] = {nullptr};
FILE *files[lane_size] = {nullptr};
- if (port->get_opcode() == RPC_WRITE_TO_STREAM)
+ if (port->get_opcode() == RPC_WRITE_TO_STREAM) {
port->recv([&](rpc::Buffer *buffer, uint32_t id) {
files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
});
+ } else if (port->get_opcode() == RPC_WRITE_TO_STDERR) {
+ std::fill(files, files + lane_size, stderr);
+ } else {
+ std::fill(files, files + lane_size, stdout);
+ }
+
port->recv_n(strs, sizes, [&](uint64_t size) { return new char[size]; });
port->send([&](rpc::Buffer *buffer, uint32_t id) {
- FILE *file =
- port->get_opcode() == RPC_WRITE_TO_STDOUT
- ? stdout
- : (port->get_opcode() == RPC_WRITE_TO_STDERR ? stderr
- : files[id]);
- uint64_t ret = fwrite(strs[id], 1, sizes[id], file);
- std::memcpy(buffer->data, &ret, sizeof(uint64_t));
+ buffer->data[0] = fwrite(strs[id], 1, sizes[id], files[id]);
+ if (port->get_opcode() == RPC_WRITE_TO_STDOUT_NEWLINE &&
+ buffer->data[0] == sizes[id])
+ buffer->data[0] += fwrite("\n", 1, 1, files[id]);
delete[] reinterpret_cast<uint8_t *>(strs[id]);
});
break;
diff --git a/openmp/libomptarget/test/libc/puts.c b/openmp/libomptarget/test/libc/puts.c
index 18d87ed1b36ae65..0e363f55296184b 100644
--- a/openmp/libomptarget/test/libc/puts.c
+++ b/openmp/libomptarget/test/libc/puts.c
@@ -31,5 +31,5 @@ int main() {
// CHECK: PASS
#pragma omp target teams num_teams(4)
#pragma omp parallel num_threads(2)
- { fputs("PASS\n", stdout); }
+ { puts("PASS\n"); }
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/67189
More information about the Openmp-commits
mailing list