[libc-commits] [libc] [libc][NFC] Adjust use of off_t internally (PR #68269)

Mikhail R. Gadelha via libc-commits libc-commits at lists.llvm.org
Wed Oct 4 15:26:38 PDT 2023


https://github.com/mikhailramalho created https://github.com/llvm/llvm-project/pull/68269

This patch includes changes related to the use of off_t in libc,
targeted at 32-bit systems: in several places, the offset is used either
as a long or an off_t (64-bit signed int), but in 32-bit systems a long
type is only 32 bits long.

1. Fix a warning in mmap where a long offset is expected, but we were
   passing an off_t. A static_cast and a comment were added to explain
   that we know we are ignoring the upper 32-bit of the off_t in 32-bit
   systems.
2. The code in pread and pwrite was slightly improved to remove a
   #ifdef LIBC_TARGET_ARCH_IS_RISCV32; we are using an if constexpr now.
3. The Linux file operations were changed to use off_t instead of a long
   where applicable. No changes were made to the standard API, e.g.,
   ftell returns the offset as an int so we added a static_cast and a
   comment explaining that this will cause a loss of integer precision
   in 32-bit systems.

>From 60401c11851edca5df01b65f3e3ce825d368e338 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Wed, 4 Oct 2023 19:10:25 -0300
Subject: [PATCH 1/2] [libc][NFC] Adjust use of off_t internally

This patch includes changes related to the use of off_t in libc,
targeted at 32-bit systems: in several places the offset is used either
as a long or an off_t (64-bit signed int), but in 32-bit systems a long
type is only 32-bits long.

1. Fix a warning in mmap where a long offset is expected, but we were
   passing a off_t. A static_cast and a comment were added to explain
   that we know we are ignoring the upper 32-bit of the off_t in 32-bit
   systems.
2. The code in pread and pwrite were slightly improved to remove a
   #ifdef LIBC_TARGET_ARCH_IS_RISCV32; we are using an if constexpr now.
3. The linux file operations were changed to use off_t instead of a long
   where applicable. No changes were made to the standard API, e.g.,
   ftell returns the offset as an int so we added a static_cast and a
   comment explaining that this will cause a loss of integer precision
   in 32-bit systems.
---
 libc/src/__support/File/file.cpp           | 14 ++++++--------
 libc/src/__support/File/file.h             |  7 ++++---
 libc/src/__support/File/linux/file.cpp     |  2 +-
 libc/src/__support/File/linux/file.h       |  2 +-
 libc/src/stdio/fopencookie.cpp             |  7 +++----
 libc/src/stdio/generic/ftell.cpp           |  6 +++++-
 libc/src/sys/mman/linux/mmap.cpp           |  5 ++++-
 libc/src/unistd/linux/pread.cpp            | 21 ++++++++++++---------
 libc/src/unistd/linux/pwrite.cpp           | 22 +++++++++++++---------
 libc/test/src/__support/File/file_test.cpp |  8 ++++----
 10 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 58097d017a23f3b..d42dd5736079a1f 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -282,7 +282,7 @@ int File::ungetc_unlocked(int c) {
   return c;
 }
 
-ErrorOr<int> File::seek(long offset, int whence) {
+ErrorOr<int> File::seek(off_t offset, int whence) {
   FileLock lock(this);
   if (prev_op == FileOp::WRITE && pos > 0) {
 
@@ -305,23 +305,21 @@ ErrorOr<int> File::seek(long offset, int whence) {
   auto result = platform_seek(this, offset, whence);
   if (!result.has_value())
     return Error(result.error());
-  else
-    return 0;
+  return 0;
 }
 
-ErrorOr<long> File::tell() {
+ErrorOr<off_t> File::tell() {
   FileLock lock(this);
   auto seek_target = eof ? SEEK_END : SEEK_CUR;
   auto result = platform_seek(this, 0, seek_target);
   if (!result.has_value() || result.value() < 0)
     return Error(result.error());
-  long platform_offset = result.value();
+  off_t platform_offset = result.value();
   if (prev_op == FileOp::READ)
     return platform_offset - (read_limit - pos);
-  else if (prev_op == FileOp::WRITE)
+  if (prev_op == FileOp::WRITE)
     return platform_offset + pos;
-  else
-    return platform_offset;
+  return platform_offset;
 }
 
 int File::flush_unlocked() {
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 2ea3843749ffe43..97769de077e8818 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -16,6 +16,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <unistd.h> // For off_t.
 
 namespace LIBC_NAMESPACE {
 
@@ -45,7 +46,7 @@ class File {
   using ReadFunc = FileIOResult(File *, void *, size_t);
   // The SeekFunc is expected to return the current offset of the external
   // file position indicator.
-  using SeekFunc = ErrorOr<long>(File *, long, int);
+  using SeekFunc = ErrorOr<off_t>(File *, off_t, int);
   using CloseFunc = int(File *);
 
   using ModeFlags = uint32_t;
@@ -179,9 +180,9 @@ class File {
     return read_unlocked(data, len);
   }
 
-  ErrorOr<int> seek(long offset, int whence);
+  ErrorOr<int> seek(off_t offset, int whence);
 
-  ErrorOr<long> tell();
+  ErrorOr<off_t> tell();
 
   // If buffer has data written to it, flush it out. Does nothing if the
   // buffer is currently being used as a read buffer.
diff --git a/libc/src/__support/File/linux/file.cpp b/libc/src/__support/File/linux/file.cpp
index 2d4cea5b53c5815..cfc7e42d0626b14 100644
--- a/libc/src/__support/File/linux/file.cpp
+++ b/libc/src/__support/File/linux/file.cpp
@@ -41,7 +41,7 @@ FileIOResult linux_file_read(File *f, void *buf, size_t size) {
   return ret;
 }
 
-ErrorOr<long> linux_file_seek(File *f, long offset, int whence) {
+ErrorOr<off_t> linux_file_seek(File *f, off_t offset, int whence) {
   auto *lf = reinterpret_cast<LinuxFile *>(f);
   auto result = internal::lseekimpl(lf->get_fd(), offset, whence);
   if (!result.has_value())
diff --git a/libc/src/__support/File/linux/file.h b/libc/src/__support/File/linux/file.h
index 24e71b133c48130..a5b01304c114535 100644
--- a/libc/src/__support/File/linux/file.h
+++ b/libc/src/__support/File/linux/file.h
@@ -12,7 +12,7 @@ namespace LIBC_NAMESPACE {
 
 FileIOResult linux_file_write(File *, const void *, size_t);
 FileIOResult linux_file_read(File *, void *, size_t);
-ErrorOr<long> linux_file_seek(File *, long, int);
+ErrorOr<off_t> linux_file_seek(File *, off_t, int);
 int linux_file_close(File *);
 
 class LinuxFile : public File {
diff --git a/libc/src/stdio/fopencookie.cpp b/libc/src/stdio/fopencookie.cpp
index 2cb7ad2f46ebf6c..17b8ae199d3db39 100644
--- a/libc/src/stdio/fopencookie.cpp
+++ b/libc/src/stdio/fopencookie.cpp
@@ -24,7 +24,7 @@ class CookieFile : public LIBC_NAMESPACE::File {
 
   static FileIOResult cookie_write(File *f, const void *data, size_t size);
   static FileIOResult cookie_read(File *f, void *data, size_t size);
-  static ErrorOr<long> cookie_seek(File *f, long offset, int whence);
+  static ErrorOr<off_t> cookie_seek(File *f, off_t offset, int whence);
   static int cookie_close(File *f);
 
 public:
@@ -52,7 +52,7 @@ FileIOResult CookieFile::cookie_read(File *f, void *data, size_t size) {
                                reinterpret_cast<char *>(data), size);
 }
 
-ErrorOr<long> CookieFile::cookie_seek(File *f, long offset, int whence) {
+ErrorOr<off_t> CookieFile::cookie_seek(File *f, off_t offset, int whence) {
   auto cookie_file = reinterpret_cast<CookieFile *>(f);
   if (cookie_file->ops.seek == nullptr) {
     return Error(EINVAL);
@@ -61,8 +61,7 @@ ErrorOr<long> CookieFile::cookie_seek(File *f, long offset, int whence) {
   int result = cookie_file->ops.seek(cookie_file->cookie, &offset64, whence);
   if (result == 0)
     return offset64;
-  else
-    return -1;
+  return -1;
 }
 
 int CookieFile::cookie_close(File *f) {
diff --git a/libc/src/stdio/generic/ftell.cpp b/libc/src/stdio/generic/ftell.cpp
index 5f7803150534b31..14be2d6e0c34aae 100644
--- a/libc/src/stdio/generic/ftell.cpp
+++ b/libc/src/stdio/generic/ftell.cpp
@@ -20,7 +20,11 @@ LLVM_LIBC_FUNCTION(long, ftell, (::FILE * stream)) {
     libc_errno = result.error();
     return -1;
   }
-  return result.value();
+  // tell() returns an off_t (64-bit signed integer), but this function returns
+  // a long (32-bit signed integer in 32-bit systems). We add a cast here to
+  // silence a "implicit conversion loses integer precision" warning when
+  // compiling for 32-bit systems.
+  return static_cast<long>(result.value());
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/sys/mman/linux/mmap.cpp b/libc/src/sys/mman/linux/mmap.cpp
index 16111c66859f5e7..2aa7003f342a967 100644
--- a/libc/src/sys/mman/linux/mmap.cpp
+++ b/libc/src/sys/mman/linux/mmap.cpp
@@ -39,9 +39,12 @@ LLVM_LIBC_FUNCTION(void *, mmap,
 #error "mmap or mmap2 syscalls not available."
 #endif
 
+  // We add an explicit cast to silence a "implicit conversion loses integer
+  // precision" warning when compiling for 32-bit systems.
+  long mmap_offset = static_cast<long>(offset);
   long ret =
       LIBC_NAMESPACE::syscall_impl(syscall_number, reinterpret_cast<long>(addr),
-                                   size, prot, flags, fd, offset);
+                                   size, prot, flags, fd, mmap_offset);
 
   // The mmap/mmap2 syscalls return negative values on error. These negative
   // values are actually the negative values of the error codes. So, fix them
diff --git a/libc/src/unistd/linux/pread.cpp b/libc/src/unistd/linux/pread.cpp
index 614de9732c6271a..d4085f79c4243b2 100644
--- a/libc/src/unistd/linux/pread.cpp
+++ b/libc/src/unistd/linux/pread.cpp
@@ -19,15 +19,18 @@ namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(ssize_t, pread,
                    (int fd, void *buf, size_t count, off_t offset)) {
-#ifdef LIBC_TARGET_ARCH_IS_RISCV32
-  static_assert(sizeof(off_t) == 8);
-  ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(
-      SYS_pread64, fd, buf, count, (long)offset,
-      (long)(((uint64_t)(offset)) >> 32));
-#else
-  ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pread64, fd, buf,
-                                                      count, offset);
-#endif
+  ssize_t ret;
+  if constexpr (sizeof(off_t) == 8 && sizeof(size_t) == 4) {
+    // This is a 32-bit system with a 64-bit offset.
+    long offset_low = static_cast<long>(offset);
+    long offset_high = static_cast<long>(offset >> 32);
+    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(
+        SYS_pread64, fd, buf, count, offset_low, offset_high);
+  } else {
+    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pread64, fd, buf,
+                                                        count, offset);
+  }
+
   if (ret < 0) {
     libc_errno = static_cast<int>(-ret);
     return -1;
diff --git a/libc/src/unistd/linux/pwrite.cpp b/libc/src/unistd/linux/pwrite.cpp
index 6c6a0b555ac1335..815bdad5910a44d 100644
--- a/libc/src/unistd/linux/pwrite.cpp
+++ b/libc/src/unistd/linux/pwrite.cpp
@@ -19,15 +19,19 @@ namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(ssize_t, pwrite,
                    (int fd, const void *buf, size_t count, off_t offset)) {
-#ifdef LIBC_TARGET_ARCH_IS_RISCV32
-  static_assert(sizeof(off_t) == 8);
-  ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(
-      SYS_pwrite64, fd, buf, count, (long)offset,
-      (long)(((uint64_t)(offset)) >> 32));
-#else
-  ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pwrite64, fd, buf,
-                                                      count, offset);
-#endif
+
+  ssize_t ret;
+  if constexpr (sizeof(off_t) == 8 && sizeof(size_t) == 4) {
+    // This is a 32-bit system with a 64-bit offset.
+    long offset_low = static_cast<long>(offset);
+    long offset_high = static_cast<long>(offset >> 32);
+    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(
+        SYS_pwrite64, fd, buf, count, offset_low, offset_high);
+  } else {
+    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pwrite64, fd, buf,
+                                                        count, offset);
+  }
+
   if (ret < 0) {
     libc_errno = static_cast<int>(-ret);
     return -1;
diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp
index fbcedc163de10a8..2f68c3faa0ad087 100644
--- a/libc/test/src/__support/File/file_test.cpp
+++ b/libc/test/src/__support/File/file_test.cpp
@@ -31,8 +31,8 @@ class StringFile : public File {
   static FileIOResult str_read(LIBC_NAMESPACE::File *f, void *data, size_t len);
   static FileIOResult str_write(LIBC_NAMESPACE::File *f, const void *data,
                                 size_t len);
-  static ErrorOr<long> str_seek(LIBC_NAMESPACE::File *f, long offset,
-                                int whence);
+  static ErrorOr<off_t> str_seek(LIBC_NAMESPACE::File *f, off_t offset,
+                                 int whence);
   static int str_close(LIBC_NAMESPACE::File *f) {
     delete reinterpret_cast<StringFile *>(f);
     return 0;
@@ -93,8 +93,8 @@ FileIOResult StringFile::str_write(LIBC_NAMESPACE::File *f, const void *data,
   return i;
 }
 
-ErrorOr<long> StringFile::str_seek(LIBC_NAMESPACE::File *f, long offset,
-                                   int whence) {
+ErrorOr<off_t> StringFile::str_seek(LIBC_NAMESPACE::File *f, off_t offset,
+                                    int whence) {
   StringFile *sf = static_cast<StringFile *>(f);
   if (whence == SEEK_SET)
     sf->pos = offset;

>From fa414cb74721d63a2e3f6f7e39052207623bb08a Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Wed, 4 Oct 2023 19:25:01 -0300
Subject: [PATCH 2/2] Fix not using the return variable

Signed-off-by: Mikhail R. Gadelha <mikhail at igalia.com>
---
 libc/src/unistd/linux/pread.cpp  | 8 ++++----
 libc/src/unistd/linux/pwrite.cpp | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libc/src/unistd/linux/pread.cpp b/libc/src/unistd/linux/pread.cpp
index d4085f79c4243b2..690b7133488e247 100644
--- a/libc/src/unistd/linux/pread.cpp
+++ b/libc/src/unistd/linux/pread.cpp
@@ -24,11 +24,11 @@ LLVM_LIBC_FUNCTION(ssize_t, pread,
     // This is a 32-bit system with a 64-bit offset.
     long offset_low = static_cast<long>(offset);
     long offset_high = static_cast<long>(offset >> 32);
-    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(
-        SYS_pread64, fd, buf, count, offset_low, offset_high);
+    ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pread64, fd, buf, count,
+                                                offset_low, offset_high);
   } else {
-    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pread64, fd, buf,
-                                                        count, offset);
+    ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pread64, fd, buf, count,
+                                                offset);
   }
 
   if (ret < 0) {
diff --git a/libc/src/unistd/linux/pwrite.cpp b/libc/src/unistd/linux/pwrite.cpp
index 815bdad5910a44d..1053bcc384dd94c 100644
--- a/libc/src/unistd/linux/pwrite.cpp
+++ b/libc/src/unistd/linux/pwrite.cpp
@@ -25,11 +25,11 @@ LLVM_LIBC_FUNCTION(ssize_t, pwrite,
     // This is a 32-bit system with a 64-bit offset.
     long offset_low = static_cast<long>(offset);
     long offset_high = static_cast<long>(offset >> 32);
-    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(
-        SYS_pwrite64, fd, buf, count, offset_low, offset_high);
+    ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pwrite64, fd, buf, count,
+                                                offset_low, offset_high);
   } else {
-    ssize_t ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pwrite64, fd, buf,
-                                                        count, offset);
+    ret = LIBC_NAMESPACE::syscall_impl<ssize_t>(SYS_pwrite64, fd, buf, count,
+                                                offset);
   }
 
   if (ret < 0) {



More information about the libc-commits mailing list