[compiler-rt] [flang] [llvm] [llvm] fix mustache template whitespace (PR #153724)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 29 14:47:09 PDT 2025


https://github.com/mdenson updated https://github.com/llvm/llvm-project/pull/153724

>From 6f40b9f42eddfbb0c000514bc8e0ed711f037941 Mon Sep 17 00:00:00 2001
From: Brock Denson <brock.denson at virscient.com>
Date: Thu, 14 Aug 2025 20:19:37 -0500
Subject: [PATCH 1/6] [clang-doc] fix mustache template whitespace

---
 llvm/lib/Support/Mustache.cpp | 55 +++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 6c2ed6c84c6cf..205d9b51103c2 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -166,6 +166,10 @@ class ASTNode {
   void renderSectionLambdas(const llvm::json::Value &Contexts,
                             llvm::raw_ostream &OS, SectionLambda &L);
 
+  void indentTextNode(std::string &body, size_t Indentation, bool lastChild);
+
+  void indentNodes(ASTNode *Node, bool isPartial);
+
   void renderPartial(const llvm::json::Value &Contexts, llvm::raw_ostream &OS,
                      ASTNode *Partial);
 
@@ -681,10 +685,57 @@ void ASTNode::renderChild(const json::Value &Contexts, llvm::raw_ostream &OS) {
     Child->render(Contexts, OS);
 }
 
+void ASTNode::indentTextNode(std::string &body, size_t Indentation,
+                             bool lastChild) {
+  std::string spaces(Indentation, ' ');
+  size_t pos = 0;
+
+  if (lastChild)
+    body.erase(body.find_last_not_of(" \t\r\f\v") + 1); // .rtrim??
+
+  while ((pos = body.find('\n', pos)) != std::string::npos) {
+    if ((!lastChild) || (pos != body.size() - 1)) {
+      body.insert(pos + 1, spaces);
+      pos += 1 + Indentation;
+    } else {
+      break;
+    }
+  }
+}
+
+void ASTNode::indentNodes(ASTNode *Node, bool isPartial) {
+  size_t size = Node->Children.size();
+
+  for (size_t i = 0; i < size; ++i) {
+    ASTNode *child = Node->Children[i].get();
+    switch (child->Ty) {
+    case ASTNode::Text: {
+      indentTextNode(child->Body, Indentation, ((i == size - 1) && isPartial));
+      break;
+    }
+    case ASTNode::Section: {
+      indentNodes(child, false);
+      break;
+    }
+    case ASTNode::Partial: {
+      indentNodes(child, true);
+    }
+    case ASTNode::Root:
+    case ASTNode::Variable:
+    case ASTNode::UnescapeVariable:
+    case ASTNode::InvertSection:
+      break;
+    default:
+      llvm::outs() << "Invalid ASTNode type\n";
+      break;
+    }
+  }
+}
+
 void ASTNode::renderPartial(const json::Value &Contexts, llvm::raw_ostream &OS,
                             ASTNode *Partial) {
-  AddIndentationStringStream IS(OS, Indentation);
-  Partial->render(Contexts, IS);
+  indentNodes(Partial, true);
+  Partial->render(Contexts, OS);
 }
 
 void ASTNode::renderLambdas(const json::Value &Contexts, llvm::raw_ostream &OS,

>From 643b340c354c1966a86876378078eb4edd68b7f0 Mon Sep 17 00:00:00 2001
From: Brock Denson <brock.denson at virscient.com>
Date: Fri, 15 Aug 2025 14:15:16 -0500
Subject: [PATCH 2/6] stop trimming partial whitespace

---
 llvm/lib/Support/Mustache.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 205d9b51103c2..8e852cb6345f3 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -166,7 +166,7 @@ class ASTNode {
   void renderSectionLambdas(const llvm::json::Value &Contexts,
                             llvm::raw_ostream &OS, SectionLambda &L);
 
-  void indentTextNode(std::string &body, size_t Indentation, bool lastChild);
+  void indentTextNode(std::string &Body, size_t Indentation, bool FinalNode);
 
   void indentNodes(ASTNode *Node, bool isPartial);
 
@@ -685,17 +685,19 @@ void ASTNode::renderChild(const json::Value &Contexts, llvm::raw_ostream &OS) {
     Child->render(Contexts, OS);
 }
 
-void ASTNode::indentTextNode(std::string &body, size_t Indentation,
-                             bool lastChild) {
+void ASTNode::indentTextNode(std::string &Body, size_t Indentation,
+                             bool FinalNode) {
   std::string spaces(Indentation, ' ');
   size_t pos = 0;
+  size_t LastChar = std::string::npos;
 
-  if (lastChild)
-    body.erase(body.find_last_not_of(" \t\r\f\v") + 1); // .rtrim??
+  if (FinalNode)
+    // body.erase(body.find_last_not_of(" \t\r\f\v") + 1);
+    LastChar = Body.find_last_not_of(" \t\r\f\v");
 
-  while ((pos = body.find('\n', pos)) != std::string::npos) {
-    if ((!lastChild) || (pos != body.size() - 1)) {
-      body.insert(pos + 1, spaces);
+  while ((pos = Body.find('\n', pos)) != std::string::npos) {
+    if ((!FinalNode) || (pos != LastChar)) {
+      Body.insert(pos + 1, spaces);
       pos += 1 + Indentation;
     } else {
       break;

>From 403c6ac8e836babdd2cb9526986d563964435af7 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 29 Aug 2025 21:01:06 +0000
Subject: [PATCH 3/6] [fuzzer] Avoid buffer overflow in CrossOverTest.cpp (and
 fix-forward build breakage from #155943) (#156103)

ASan now detects dereferences of zero-sized allocations
(https://github.com/llvm/llvm-project/pull/155943; the corresponding
MSan change is https://github.com/llvm/llvm-project/pull/155944). This
appears to have detected a bug in CrossOverTest.cpp, causing a buildbot
breakage. This patch fixes the test.

Buildbot report: https://lab.llvm.org/buildbot/#/builders/4/builds/8732
```
            7: ==949882==ERROR: AddressSanitizer: heap-buffer-overflow on address 0xf169cfbe0010 at pc 0xb5f45efc6d1c bp 0xffffd933e460 sp 0xffffd933e458
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            8: READ of size 1 at 0xf169cfbe0010 thread T0
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            9:  #0 0xb5f45efc6d18 in LLVMFuzzerTestOneInput /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/fuzzer/CrossOverTest.cpp:48:7
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
check:20'1                                                                                                                                 ?                             possible intended match
           10:  #1 0xb5f45eec7288 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:619:13
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           11:  #2 0xb5f45eec85d4 in fuzzer::Fuzzer::ReadAndExecuteSeedCorpora(std::vector<fuzzer::SizedFile, std::allocator<fuzzer::SizedFile>>&) /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:812:3
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           12:  #3 0xb5f45eec8c60 in fuzzer::Fuzzer::Loop(std::vector<fuzzer::SizedFile, std::allocator<fuzzer::SizedFile>>&) /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:872:3
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           13:  #4 0xb5f45eeb5c64 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:923:6
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           14:  #5 0xb5f45eee09d0 in main /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10
check:20'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
```

For context, FuzzerLoop.cpp:812 tries empty input:
```
810  // Test the callback with empty input and never try it again.
811  uint8_t dummy = 0;
812  ExecuteCallback(&dummy, 0);
```
---
 compiler-rt/test/fuzzer/CrossOverTest.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/compiler-rt/test/fuzzer/CrossOverTest.cpp b/compiler-rt/test/fuzzer/CrossOverTest.cpp
index b4506f665dc76..6d764d0b6a6bd 100644
--- a/compiler-rt/test/fuzzer/CrossOverTest.cpp
+++ b/compiler-rt/test/fuzzer/CrossOverTest.cpp
@@ -45,6 +45,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   // fprintf(stderr, "ExpectedHash: %x\n", ExpectedHash);
   if (Size == 10 && ExpectedHash == simple_hash(Data, Size))
     *NullPtr = 0;
+  // It's UB to read *Data when Size == 0
+  if (Size == 0)
+    return;
   if (*Data == 'A')
     Sink++;
   if (*Data == 'Z')

>From eece926b3f46fc0efe3850756c1c22150ea2e2e2 Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Fri, 29 Aug 2025 23:04:48 +0200
Subject: [PATCH 4/6] [Flang][OpenMP][Runtime] Minor Flang runtime for OpenMP
 AMDGPU modifications (#152631)

We have some modifications downstream to compile the flang runtime for
amdgpu using clang OpenMP, some more hacky than others to workaround
(hopefully temporary) compiler issues. The additions here are the
non-hacky alterations.

Main changes:
* Create freestanding versions of memcpy, strlen and memmove, and
replace std:: references with these so that we can default to std:: when
it's available, or our own Flang implementation when it's not. * Wrap
more bits and pieces of the library in declare target wrappers (RT_*
macros). * Fix some warnings that'll pose issues with werror on, in this
case having the namespace infront of variables passed to templates.

Another minor issues that'll likely still pop up depending on the
program you're linking with is that abort will be undefined, it is
perhaps possible to solve it with a freestanding implementation as with
memcpy etc. but we end up with multiple definitions in this case. An
alternative is to create an empty extern "c" version (which can be empty
or forwrd on to the builtin).

Co-author: Dan Palermo Dan.Palermo at amd.com
---
 flang-rt/include/flang-rt/runtime/buffer.h    |  4 +-
 .../include/flang-rt/runtime/descriptor.h     |  2 +
 .../flang-rt/runtime/format-implementation.h  |  2 +-
 flang-rt/include/flang-rt/runtime/tools.h     |  4 +-
 flang-rt/lib/runtime/array-constructor.cpp    |  2 +-
 flang-rt/lib/runtime/assign.cpp               |  4 +-
 flang-rt/lib/runtime/character.cpp            | 10 +--
 flang-rt/lib/runtime/command.cpp              |  2 +-
 flang-rt/lib/runtime/copy.cpp                 |  6 +-
 flang-rt/lib/runtime/derived.cpp              |  8 +-
 flang-rt/lib/runtime/descriptor-io.cpp        |  2 +-
 flang-rt/lib/runtime/descriptor.cpp           |  3 +-
 flang-rt/lib/runtime/edit-input.cpp           | 14 ++--
 flang-rt/lib/runtime/extensions.cpp           | 10 +--
 flang-rt/lib/runtime/external-unit.cpp        |  6 +-
 flang-rt/lib/runtime/extrema.cpp              |  6 +-
 flang-rt/lib/runtime/internal-unit.cpp        |  2 +-
 flang-rt/lib/runtime/io-error.cpp             |  2 +-
 flang-rt/lib/runtime/matmul-transpose.cpp     |  4 +-
 flang-rt/lib/runtime/matmul.cpp               |  6 +-
 flang-rt/lib/runtime/misc-intrinsic.cpp       |  4 +-
 flang-rt/lib/runtime/pseudo-unit.cpp          |  2 +-
 flang-rt/lib/runtime/ragged.cpp               |  2 +-
 flang-rt/lib/runtime/reduce.cpp               |  6 +-
 flang-rt/lib/runtime/stat.cpp                 |  6 +-
 flang-rt/lib/runtime/temporary-stack.cpp      | 10 ++-
 flang-rt/lib/runtime/time-intrinsic.cpp       | 15 ++--
 flang-rt/lib/runtime/tools.cpp                | 26 +++----
 flang-rt/lib/runtime/transformational.cpp     |  2 +-
 flang-rt/lib/runtime/unit-map.cpp             |  2 +-
 flang-rt/lib/runtime/unit.cpp                 | 14 ++--
 flang-rt/lib/runtime/work-queue.cpp           |  8 +-
 flang/include/flang/Common/Fortran-consts.h   |  3 +
 flang/include/flang/Common/constexpr-bitset.h |  3 +-
 .../flang/Decimal/binary-floating-point.h     |  5 +-
 .../flang/Runtime/allocator-registry-consts.h |  4 +
 .../flang/Runtime/freestanding-tools.h        | 75 ++++++++++++++++++-
 flang/include/flang/Runtime/stop.h            |  2 +
 38 files changed, 191 insertions(+), 97 deletions(-)

diff --git a/flang-rt/include/flang-rt/runtime/buffer.h b/flang-rt/include/flang-rt/runtime/buffer.h
index b5a9ce9e35e91..4339213488e22 100644
--- a/flang-rt/include/flang-rt/runtime/buffer.h
+++ b/flang-rt/include/flang-rt/runtime/buffer.h
@@ -158,8 +158,8 @@ template <typename STORE, std::size_t minBuffer = 65536> class FileFrame {
       // Avoid passing a null pointer, since it would result in an undefined
       // behavior.
       if (old != nullptr) {
-        std::memcpy(buffer_, old + start_, chunk);
-        std::memcpy(buffer_ + chunk, old, length_ - chunk);
+        runtime::memcpy(buffer_, old + start_, chunk);
+        runtime::memcpy(buffer_ + chunk, old, length_ - chunk);
         FreeMemory(old);
       }
       start_ = 0;
diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index e440690e40286..4c65abce782d3 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -32,8 +32,10 @@
 #include <cstdlib>
 #include <cstring>
 
+RT_OFFLOAD_VAR_GROUP_BEGIN
 /// Value used for asyncObject when no specific stream is specified.
 static constexpr std::int64_t *kNoAsyncObject = nullptr;
+RT_OFFLOAD_VAR_GROUP_END
 
 namespace Fortran::runtime {
 
diff --git a/flang-rt/include/flang-rt/runtime/format-implementation.h b/flang-rt/include/flang-rt/runtime/format-implementation.h
index 580e04f335aec..de06524de32d3 100644
--- a/flang-rt/include/flang-rt/runtime/format-implementation.h
+++ b/flang-rt/include/flang-rt/runtime/format-implementation.h
@@ -49,7 +49,7 @@ RT_API_ATTRS FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
       SubscriptValue at[maxRank];
       formatDescriptor->GetLowerBounds(at);
       for (std::size_t j{0}; j < elements; ++j) {
-        std::memcpy(p, formatDescriptor->Element<char>(at), elementBytes);
+        runtime::memcpy(p, formatDescriptor->Element<char>(at), elementBytes);
         p += elementBytes;
         formatDescriptor->IncrementSubscripts(at);
       }
diff --git a/flang-rt/include/flang-rt/runtime/tools.h b/flang-rt/include/flang-rt/runtime/tools.h
index f26923b140474..1939c4d907be4 100644
--- a/flang-rt/include/flang-rt/runtime/tools.h
+++ b/flang-rt/include/flang-rt/runtime/tools.h
@@ -560,9 +560,9 @@ RT_API_ATTRS void CopyAndPad(
       to[j] = static_cast<TO>(' ');
     }
   } else if (toChars <= fromChars) {
-    std::memcpy(to, from, toChars * sizeof(TO));
+    runtime::memcpy(to, from, toChars * sizeof(TO));
   } else {
-    std::memcpy(to, from, std::min(toChars, fromChars) * sizeof(TO));
+    runtime::memcpy(to, from, std::min(toChars, fromChars) * sizeof(TO));
     for (std::size_t j{fromChars}; j < toChars; ++j) {
       to[j] = static_cast<TO>(' ');
     }
diff --git a/flang-rt/lib/runtime/array-constructor.cpp b/flang-rt/lib/runtime/array-constructor.cpp
index 858fac7bf2b39..9838c69ff1f9e 100644
--- a/flang-rt/lib/runtime/array-constructor.cpp
+++ b/flang-rt/lib/runtime/array-constructor.cpp
@@ -173,7 +173,7 @@ void RTDEF(PushArrayConstructorSimpleScalar)(
   AllocateOrReallocateVectorIfNeeded(vector, terminator, to.Elements(), 1);
   SubscriptValue subscript[1]{
       to.GetDimension(0).LowerBound() + vector.nextValuePosition};
-  std::memcpy(to.Element<char>(subscript), from, to.ElementBytes());
+  runtime::memcpy(to.Element<char>(subscript), from, to.ElementBytes());
   ++vector.nextValuePosition;
 }
 
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index a21e899c2c658..923b76adca726 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -288,7 +288,7 @@ RT_API_ATTRS int AssignTicket::Begin(WorkQueue &workQueue) {
     if (mustDeallocateLHS) {
       // Convert the LHS into a temporary, then make it look deallocated.
       toDeallocate_ = &tempDescriptor_.descriptor();
-      std::memcpy(
+      runtime::memcpy(
           reinterpret_cast<void *>(toDeallocate_), &to_, to_.SizeInBytes());
       to_.set_base_addr(nullptr);
       if (toDerived_ && (flags_ & NeedFinalization)) {
@@ -307,7 +307,7 @@ RT_API_ATTRS int AssignTicket::Begin(WorkQueue &workQueue) {
       auto descBytes{from_->SizeInBytes()};
       Descriptor &newFrom{tempDescriptor_.descriptor()};
       persist_ = true; // tempDescriptor_ state must outlive child tickets
-      std::memcpy(reinterpret_cast<void *>(&newFrom), from_, descBytes);
+      runtime::memcpy(reinterpret_cast<void *>(&newFrom), from_, descBytes);
       // Pretend the temporary descriptor is for an ALLOCATABLE
       // entity, otherwise, the Deallocate() below will not
       // free the descriptor memory.
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index f140d202e118e..98a225dbec9f9 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -616,8 +616,8 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
   from.GetLowerBounds(fromAt);
   for (; elements-- > 0;
        to += newBytes, p += oldBytes, from.IncrementSubscripts(fromAt)) {
-    std::memcpy(to, p, oldBytes);
-    std::memcpy(to + oldBytes, from.Element<char>(fromAt), fromBytes);
+    runtime::memcpy(to, p, oldBytes);
+    runtime::memcpy(to + oldBytes, from.Element<char>(fromAt), fromBytes);
   }
   FreeMemory(old);
 }
@@ -698,7 +698,7 @@ void RTDEF(CharacterCompare)(
 std::size_t RTDEF(CharacterAppend1)(char *lhs, std::size_t lhsBytes,
     std::size_t offset, const char *rhs, std::size_t rhsBytes) {
   if (auto n{std::min(lhsBytes - offset, rhsBytes)}) {
-    std::memcpy(lhs + offset, rhs, n);
+    runtime::memcpy(lhs + offset, rhs, n);
     offset += n;
   }
   return offset;
@@ -706,7 +706,7 @@ std::size_t RTDEF(CharacterAppend1)(char *lhs, std::size_t lhsBytes,
 
 void RTDEF(CharacterPad1)(char *lhs, std::size_t bytes, std::size_t offset) {
   if (bytes > offset) {
-    std::memset(lhs + offset, ' ', bytes - offset);
+    runtime::memset(lhs + offset, ' ', bytes - offset);
   }
 }
 
@@ -838,7 +838,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
   }
   const char *from{string.OffsetElement()};
   for (char *to{result.OffsetElement()}; ncopies-- > 0; to += origBytes) {
-    std::memcpy(to, from, origBytes);
+    runtime::memcpy(to, from, origBytes);
   }
 }
 
diff --git a/flang-rt/lib/runtime/command.cpp b/flang-rt/lib/runtime/command.cpp
index a4e8e31ad0274..6b5d7722d9ebf 100644
--- a/flang-rt/lib/runtime/command.cpp
+++ b/flang-rt/lib/runtime/command.cpp
@@ -58,7 +58,7 @@ static std::int64_t StringLength(const char *string) {
 
 static void FillWithSpaces(const Descriptor &value, std::size_t offset = 0) {
   if (offset < value.ElementBytes()) {
-    std::memset(
+    runtime::memset(
         value.OffsetElement(offset), ' ', value.ElementBytes() - offset);
   }
 }
diff --git a/flang-rt/lib/runtime/copy.cpp b/flang-rt/lib/runtime/copy.cpp
index f990f46e0be66..1db8962dad0d3 100644
--- a/flang-rt/lib/runtime/copy.cpp
+++ b/flang-rt/lib/runtime/copy.cpp
@@ -12,6 +12,8 @@
 #include "flang-rt/runtime/terminator.h"
 #include "flang-rt/runtime/type-info.h"
 #include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/freestanding-tools.h"
+
 #include <cstring>
 
 namespace Fortran::runtime {
@@ -101,7 +103,7 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
     char *toPtr{to.Element<char>(toAt)};
     char *fromPtr{from.Element<char>(fromAt)};
     RUNTIME_CHECK(terminator, to.ElementBytes() == from.ElementBytes());
-    std::memcpy(toPtr, fromPtr, to.ElementBytes());
+    runtime::memcpy(toPtr, fromPtr, to.ElementBytes());
     return;
   }
 
@@ -148,7 +150,7 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
     // Moreover, if we came here from an Component::Genre::Data component,
     // all the per-element copies are redundant, because the parent
     // has already been copied as a whole.
-    std::memcpy(toPtr, fromPtr, curTo.ElementBytes());
+    runtime::memcpy(toPtr, fromPtr, curTo.ElementBytes());
     --elements;
     if (elements != 0) {
       currentCopy.IncrementSubscripts(terminator);
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index 2dddf079f91db..6abeb2edd1da7 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -71,7 +71,7 @@ RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) {
       // Explicit initialization of data pointers and
       // non-allocatable non-automatic components
       std::size_t bytes{component_->SizeInBytes(instance_)};
-      std::memcpy(rawComponent, init, bytes);
+      runtime::memcpy(rawComponent, init, bytes);
     } else if (component_->genre() == typeInfo::Component::Genre::Pointer) {
       // Data pointers without explicit initialization are established
       // so that they are valid right-hand side targets of pointer
@@ -108,20 +108,20 @@ RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) {
             chunk = done;
           }
           char *uninitialized{rawInstance + done * *stride};
-          std::memcpy(uninitialized, rawInstance, chunk * *stride);
+          runtime::memcpy(uninitialized, rawInstance, chunk * *stride);
           done += chunk;
         }
       } else {
         for (std::size_t done{1}; done < elements_; ++done) {
           char *uninitialized{rawInstance + done * *stride};
-          std::memcpy(uninitialized, rawInstance, elementBytes);
+          runtime::memcpy(uninitialized, rawInstance, elementBytes);
         }
       }
     } else { // one at a time with subscription
       for (Elementwise::Advance(); !Elementwise::IsComplete();
           Elementwise::Advance()) {
         char *element{instance_.Element<char>(subscripts_)};
-        std::memcpy(element, rawInstance, elementBytes);
+        runtime::memcpy(element, rawInstance, elementBytes);
       }
     }
   }
diff --git a/flang-rt/lib/runtime/descriptor-io.cpp b/flang-rt/lib/runtime/descriptor-io.cpp
index 9986be4c3957e..e00072510aff7 100644
--- a/flang-rt/lib/runtime/descriptor-io.cpp
+++ b/flang-rt/lib/runtime/descriptor-io.cpp
@@ -65,7 +65,7 @@ static RT_API_ATTRS common::optional<bool> DefinedFormattedIo(
     if (edit.descriptor == DataEdit::DefinedDerivedType) {
       ioType[0] = 'D';
       ioType[1] = 'T';
-      std::memcpy(ioType + 2, edit.ioType, edit.ioTypeChars);
+      runtime::memcpy(ioType + 2, edit.ioType, edit.ioTypeChars);
     } else {
       runtime::strcpy(
           ioType, io.mutableModes().inNamelist ? "NAMELIST" : "LISTDIRECTED");
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 882870a570428..e4b5bd2e74dbf 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -15,6 +15,7 @@
 #include "flang-rt/runtime/terminator.h"
 #include "flang-rt/runtime/type-info.h"
 #include "flang/Common/type-kinds.h"
+#include "flang/Runtime/freestanding-tools.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
@@ -26,7 +27,7 @@ RT_OFFLOAD_API_GROUP_BEGIN
 RT_API_ATTRS Descriptor::Descriptor(const Descriptor &that) { *this = that; }
 
 RT_API_ATTRS Descriptor &Descriptor::operator=(const Descriptor &that) {
-  std::memcpy(reinterpret_cast<void *>(this), &that, that.SizeInBytes());
+  runtime::memcpy(this, &that, that.SizeInBytes());
   return *this;
 }
 
diff --git a/flang-rt/lib/runtime/edit-input.cpp b/flang-rt/lib/runtime/edit-input.cpp
index 7de5ec35d41c7..6ab546ee59f74 100644
--- a/flang-rt/lib/runtime/edit-input.cpp
+++ b/flang-rt/lib/runtime/edit-input.cpp
@@ -110,7 +110,7 @@ static RT_API_ATTRS bool EditBOZInput(
   io.HandleAbsolutePosition(start);
   remaining.reset();
   // Make a second pass now that the digit count is known
-  std::memset(n, 0, bytes);
+  runtime::memset(n, 0, bytes);
   int increment{isHostLittleEndian ? -1 : 1};
   auto *data{reinterpret_cast<unsigned char *>(n) +
       (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)};
@@ -283,18 +283,18 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
     auto shft{static_cast<int>(sizeof value - kind)};
     if (!isHostLittleEndian && shft >= 0) {
       auto shifted{value << (8 * shft)};
-      std::memcpy(n, &shifted, kind);
+      runtime::memcpy(n, &shifted, kind);
     } else {
-      std::memcpy(n, &value, kind); // a blank field means zero
+      runtime::memcpy(n, &value, kind); // a blank field means zero
     }
 #else
     auto shft{static_cast<int>(sizeof(value.low())) - kind};
     // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
     if (!isHostLittleEndian && shft >= 0) {
       auto l{value.low() << (8 * shft)};
-      std::memcpy(n, &l, kind);
+      runtime::memcpy(n, &l, kind);
     } else {
-      std::memcpy(n, &value, kind); // a blank field means zero
+      runtime::memcpy(n, &value, kind); // a blank field means zero
     }
 #endif
     io.GotChar(fastField.got());
@@ -1121,7 +1121,7 @@ RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
         --skipChars;
       } else {
         char32_t buffer{0};
-        std::memcpy(&buffer, input, chunkBytes);
+        runtime::memcpy(&buffer, input, chunkBytes);
         if ((sizeof *x == 1 && buffer > 0xff) ||
             (sizeof *x == 2 && buffer > 0xffff)) {
           *x++ = '?';
@@ -1148,7 +1148,7 @@ RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
         chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
         chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
         chunkChars = chunkBytes;
-        std::memcpy(x, input, chunkBytes);
+        runtime::memcpy(x, input, chunkBytes);
         x += chunkBytes;
         lengthChars -= chunkChars;
       }
diff --git a/flang-rt/lib/runtime/extensions.cpp b/flang-rt/lib/runtime/extensions.cpp
index a24810b4f344a..be0eed6f49dc8 100644
--- a/flang-rt/lib/runtime/extensions.cpp
+++ b/flang-rt/lib/runtime/extensions.cpp
@@ -148,7 +148,7 @@ uid_t RTNAME(GetUID)() {
 
 void GetUsernameEnvVar(const char *envName, char *arg, std::int64_t length) {
   Descriptor name{*Descriptor::Create(
-      1, std::strlen(envName) + 1, const_cast<char *>(envName), 0)};
+      1, runtime::strlen(envName) + 1, const_cast<char *>(envName), 0)};
   Descriptor value{*Descriptor::Create(1, length, arg, 0)};
 
   RTNAME(GetEnvVariable)
@@ -172,7 +172,7 @@ void FORTRAN_PROCEDURE_NAME(fdate)(char *arg, std::int64_t length) {
   char str[26];
   // Insufficient space, fill with spaces and return.
   if (length < 24) {
-    std::memset(arg, ' ', length);
+    runtime::memset(arg, ' ', length);
     return;
   }
 
@@ -204,8 +204,8 @@ void FORTRAN_PROCEDURE_NAME(getarg)(
 void FORTRAN_PROCEDURE_NAME(getlog)(char *arg, std::int64_t length) {
 #if _REENTRANT || _POSIX_C_SOURCE >= 199506L
   if (length >= 1 && getlogin_r(arg, length) == 0) {
-    auto loginLen{std::strlen(arg)};
-    std::memset(
+    auto loginLen{runtime::strlen(arg)};
+    runtime::memset(
         arg + loginLen, ' ', static_cast<std::size_t>(length) - loginLen);
     return;
   }
@@ -259,7 +259,7 @@ std::int64_t FORTRAN_PROCEDURE_NAME(access)(const char *name,
   char *newName{nullptr};
   if (name[nameLength - 1] != '\0') {
     newName = static_cast<char *>(std::malloc(nameLength + 1));
-    std::memcpy(newName, name, nameLength);
+    runtime::memcpy(newName, name, nameLength);
     newName[nameLength] = '\0';
     name = newName;
   }
diff --git a/flang-rt/lib/runtime/external-unit.cpp b/flang-rt/lib/runtime/external-unit.cpp
index 42441e59d9bb6..63a93c12eec57 100644
--- a/flang-rt/lib/runtime/external-unit.cpp
+++ b/flang-rt/lib/runtime/external-unit.cpp
@@ -122,7 +122,7 @@ bool ExternalFileUnit::OpenUnit(common::optional<OpenStatus> status,
   bool impliedClose{false};
   if (IsConnected()) {
     bool isSamePath{newPath.get() && path() && pathLength() == newPathLength &&
-        std::memcmp(path(), newPath.get(), newPathLength) == 0};
+        runtime::memcmp(path(), newPath.get(), newPathLength) == 0};
     if (status && *status != OpenStatus::Old && isSamePath) {
       handler.SignalError("OPEN statement for connected unit may not have "
                           "explicit STATUS= other than 'OLD'");
@@ -202,8 +202,8 @@ bool ExternalFileUnit::OpenAnonymousUnit(common::optional<OpenStatus> status,
   std::size_t pathMaxLen{32};
   auto path{SizedNew<char>{handler}(pathMaxLen)};
   std::snprintf(path.get(), pathMaxLen, "fort.%d", unitNumber_);
-  OpenUnit(status, action, position, std::move(path), std::strlen(path.get()),
-      convert, handler);
+  OpenUnit(status, action, position, std::move(path),
+      runtime::strlen(path.get()), convert, handler);
   return IsConnected();
 }
 
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp
index 03e574a8fbff1..9846529665e8b 100644
--- a/flang-rt/lib/runtime/extrema.cpp
+++ b/flang-rt/lib/runtime/extrema.cpp
@@ -428,7 +428,7 @@ inline RT_API_ATTRS void TypedPartialMaxOrMinLoc(const char *intrinsic,
       CreatePartialReductionResult(result, x,
           Descriptor::BytesFor(TypeCategory::Integer, kind), dim, terminator,
           intrinsic, TypeCode{TypeCategory::Integer, kind});
-      std::memset(
+      runtime::memset(
           result.OffsetElement(), 0, result.Elements() * result.ElementBytes());
       return;
     }
@@ -584,11 +584,11 @@ template <int KIND, bool IS_MAXVAL> class CharacterExtremumAccumulator {
     static_assert(std::is_same_v<A, Type>);
     std::size_t byteSize{array_.ElementBytes()};
     if (extremum_) {
-      std::memcpy(p, extremum_, byteSize);
+      runtime::memcpy(p, extremum_, byteSize);
     } else {
       // Empty array; fill with character 0 for MAXVAL.
       // For MINVAL, set all of the bits.
-      std::memset(p, IS_MAXVAL ? 0 : 255, byteSize);
+      runtime::memset(p, IS_MAXVAL ? 0 : 255, byteSize);
     }
   }
   RT_API_ATTRS bool Accumulate(const Type *x) {
diff --git a/flang-rt/lib/runtime/internal-unit.cpp b/flang-rt/lib/runtime/internal-unit.cpp
index e344b01e8b34e..cdcee2daaec45 100644
--- a/flang-rt/lib/runtime/internal-unit.cpp
+++ b/flang-rt/lib/runtime/internal-unit.cpp
@@ -72,7 +72,7 @@ RT_API_ATTRS bool InternalDescriptorUnit<DIR>::Emit(
       BlankFill(record + furthestPositionInRecord,
           positionInRecord - furthestPositionInRecord);
     }
-    std::memcpy(record + positionInRecord, data, bytes);
+    runtime::memcpy(record + positionInRecord, data, bytes);
     positionInRecord += bytes;
     furthestPositionInRecord = furthestAfter;
     return ok;
diff --git a/flang-rt/lib/runtime/io-error.cpp b/flang-rt/lib/runtime/io-error.cpp
index b350fb66fc25b..0774b014e98fd 100644
--- a/flang-rt/lib/runtime/io-error.cpp
+++ b/flang-rt/lib/runtime/io-error.cpp
@@ -153,7 +153,7 @@ bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) {
   } else if (ok) {
     std::size_t copied{Fortran::runtime::strlen(buffer)};
     if (copied < bufferLength) {
-      std::memset(buffer + copied, ' ', bufferLength - copied);
+      runtime::memset(buffer + copied, ' ', bufferLength - copied);
     }
     return true;
   } else {
diff --git a/flang-rt/lib/runtime/matmul-transpose.cpp b/flang-rt/lib/runtime/matmul-transpose.cpp
index c9e21502b629e..789f13c585ec5 100644
--- a/flang-rt/lib/runtime/matmul-transpose.cpp
+++ b/flang-rt/lib/runtime/matmul-transpose.cpp
@@ -62,7 +62,7 @@ inline static RT_API_ATTRS void MatrixTransposedTimesMatrix(
     std::size_t yColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
 
-  std::memset(product, 0, rows * cols * sizeof *product);
+  Fortran::runtime::memset(product, 0, rows * cols * sizeof *product);
   for (SubscriptValue j{0}; j < cols; ++j) {
     for (SubscriptValue i{0}; i < rows; ++i) {
       for (SubscriptValue k{0}; k < n; ++k) {
@@ -132,7 +132,7 @@ inline static RT_API_ATTRS void MatrixTransposedTimesVector(
     SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
     std::size_t xColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
-  std::memset(product, 0, rows * sizeof *product);
+  Fortran::runtime::memset(product, 0, rows * sizeof *product);
   for (SubscriptValue i{0}; i < rows; ++i) {
     for (SubscriptValue k{0}; k < n; ++k) {
       ResultType x_ki;
diff --git a/flang-rt/lib/runtime/matmul.cpp b/flang-rt/lib/runtime/matmul.cpp
index 5acb345725212..d409cb1458c90 100644
--- a/flang-rt/lib/runtime/matmul.cpp
+++ b/flang-rt/lib/runtime/matmul.cpp
@@ -81,7 +81,7 @@ inline RT_API_ATTRS void MatrixTimesMatrix(
     SubscriptValue n, std::size_t xColumnByteStride = 0,
     std::size_t yColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
-  std::memset(product, 0, rows * cols * sizeof *product);
+  Fortran::runtime::memset(product, 0, rows * cols * sizeof *product);
   const XT *RESTRICT xp0{x};
   for (SubscriptValue k{0}; k < n; ++k) {
     ResultType *RESTRICT p{product};
@@ -153,7 +153,7 @@ inline RT_API_ATTRS void MatrixTimesVector(
     SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
     std::size_t xColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
-  std::memset(product, 0, rows * sizeof *product);
+  Fortran::runtime::memset(product, 0, rows * sizeof *product);
   [[maybe_unused]] const XT *RESTRICT xp0{x};
   for (SubscriptValue k{0}; k < n; ++k) {
     ResultType *RESTRICT p{product};
@@ -203,7 +203,7 @@ inline RT_API_ATTRS void VectorTimesMatrix(
     SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
     std::size_t yColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
-  std::memset(product, 0, cols * sizeof *product);
+  Fortran::runtime::memset(product, 0, cols * sizeof *product);
   for (SubscriptValue k{0}; k < n; ++k) {
     ResultType *RESTRICT p{product};
     auto xv{static_cast<ResultType>(*x++)};
diff --git a/flang-rt/lib/runtime/misc-intrinsic.cpp b/flang-rt/lib/runtime/misc-intrinsic.cpp
index 02862918a3457..4d1165f25687c 100644
--- a/flang-rt/lib/runtime/misc-intrinsic.cpp
+++ b/flang-rt/lib/runtime/misc-intrinsic.cpp
@@ -42,14 +42,14 @@ static RT_API_ATTRS void TransferImpl(Descriptor &result,
   source.GetLowerBounds(sourceAt);
   while (resultBytes > 0 && sourceElements > 0) {
     std::size_t toMove{std::min(resultBytes, sourceElementBytes)};
-    std::memcpy(to, source.Element<char>(sourceAt), toMove);
+    runtime::memcpy(to, source.Element<char>(sourceAt), toMove);
     to += toMove;
     resultBytes -= toMove;
     --sourceElements;
     source.IncrementSubscripts(sourceAt);
   }
   if (resultBytes > 0) {
-    std::memset(to, 0, resultBytes);
+    runtime::memset(to, 0, resultBytes);
   }
 }
 
diff --git a/flang-rt/lib/runtime/pseudo-unit.cpp b/flang-rt/lib/runtime/pseudo-unit.cpp
index e9187c7e7dd56..8887ac681976f 100644
--- a/flang-rt/lib/runtime/pseudo-unit.cpp
+++ b/flang-rt/lib/runtime/pseudo-unit.cpp
@@ -130,7 +130,7 @@ std::size_t PseudoOpenFile::Write(FileOffset at, const char *buffer,
   // TODO: use persistent string buffer that can be reallocated
   // as needed, and only freed at destruction of *this.
   auto string{SizedNew<char>{handler}(bytes + 1)};
-  std::memcpy(string.get(), buffer, bytes);
+  runtime::memcpy(string.get(), buffer, bytes);
   string.get()[bytes] = '\0';
   std::printf("%s", string.get());
   return bytes;
diff --git a/flang-rt/lib/runtime/ragged.cpp b/flang-rt/lib/runtime/ragged.cpp
index dddc3ccdfd858..f28e9b5222fca 100644
--- a/flang-rt/lib/runtime/ragged.cpp
+++ b/flang-rt/lib/runtime/ragged.cpp
@@ -40,7 +40,7 @@ RT_API_ATTRS RaggedArrayHeader *RaggedArrayAllocate(RaggedArrayHeader *header,
     std::size_t bytes{static_cast<std::size_t>(elementSize * size)};
     header->bufferPointer = AllocateMemoryOrCrash(terminator, bytes);
     if (header->bufferPointer) {
-      std::memset(header->bufferPointer, 0, bytes);
+      runtime::memset(header->bufferPointer, 0, bytes);
     }
     return header;
   } else {
diff --git a/flang-rt/lib/runtime/reduce.cpp b/flang-rt/lib/runtime/reduce.cpp
index 3c5e815e32d2b..778600b4b4fa8 100644
--- a/flang-rt/lib/runtime/reduce.cpp
+++ b/flang-rt/lib/runtime/reduce.cpp
@@ -79,16 +79,16 @@ class BufferedReduceAccumulator {
       activeTemp_ = 1 - activeTemp_;
     } else {
       activeTemp_ = 0;
-      std::memcpy(&*temp_[activeTemp_], operand, elementBytes_);
+      runtime::memcpy(&*temp_[activeTemp_], operand, elementBytes_);
     }
     return true;
   }
   template <typename A>
   RT_API_ATTRS void GetResult(A *to, int /*zeroBasedDim*/ = -1) {
     if (activeTemp_ >= 0) {
-      std::memcpy(to, &*temp_[activeTemp_], elementBytes_);
+      runtime::memcpy(to, &*temp_[activeTemp_], elementBytes_);
     } else if (identity_) {
-      std::memcpy(to, identity_, elementBytes_);
+      runtime::memcpy(to, identity_, elementBytes_);
     } else {
       terminator_.Crash("REDUCE() without IDENTITY= has no result");
     }
diff --git a/flang-rt/lib/runtime/stat.cpp b/flang-rt/lib/runtime/stat.cpp
index 322b7282b7024..1d4aae2e49736 100644
--- a/flang-rt/lib/runtime/stat.cpp
+++ b/flang-rt/lib/runtime/stat.cpp
@@ -84,10 +84,10 @@ RT_API_ATTRS int ToErrmsg(const Descriptor *errmsg, int stat) {
       std::size_t bufferLength{errmsg->ElementBytes()};
       std::size_t msgLength{Fortran::runtime::strlen(msg)};
       if (msgLength >= bufferLength) {
-        std::memcpy(buffer, msg, bufferLength);
+        runtime::memcpy(buffer, msg, bufferLength);
       } else {
-        std::memcpy(buffer, msg, msgLength);
-        std::memset(buffer + msgLength, ' ', bufferLength - msgLength);
+        runtime::memcpy(buffer, msg, msgLength);
+        runtime::memset(buffer + msgLength, ' ', bufferLength - msgLength);
       }
     }
   }
diff --git a/flang-rt/lib/runtime/temporary-stack.cpp b/flang-rt/lib/runtime/temporary-stack.cpp
index 3f6fd8ee15a80..4bc161f83b29a 100644
--- a/flang-rt/lib/runtime/temporary-stack.cpp
+++ b/flang-rt/lib/runtime/temporary-stack.cpp
@@ -16,8 +16,11 @@
 #include "flang/Common/ISO_Fortran_binding_wrapper.h"
 #include "flang/Runtime/assign.h"
 
+RT_OFFLOAD_API_GROUP_BEGIN
+
 namespace {
 
+using namespace Fortran;
 using namespace Fortran::runtime;
 
 // the number of elements to allocate when first creating the vector
@@ -97,7 +100,7 @@ void DescriptorStorage<COPY_VALUES>::resize(size_type newCapacity) {
   // Avoid passing a null pointer, since it would result in an undefined
   // behavior.
   if (data_ != nullptr) {
-    memcpy(newData, data_, capacity_ * sizeof(Descriptor *));
+    runtime::memcpy(newData, data_, capacity_ * sizeof(Descriptor *));
     FreeMemory(data_);
   }
   data_ = newData;
@@ -181,8 +184,11 @@ inline static DescriptorStack *getDescriptorStorage(void *opaquePtr) {
   return static_cast<DescriptorStack *>(opaquePtr);
 }
 
+RT_OFFLOAD_API_GROUP_END
+
 namespace Fortran::runtime {
 extern "C" {
+RT_EXT_API_GROUP_BEGIN
 void *RTNAME(CreateValueStack)(const char *sourceFile, int line) {
   return ValueStack::allocate(sourceFile, line);
 }
@@ -222,6 +228,6 @@ void RTNAME(DescriptorAt)(void *opaquePtr, uint64_t i, Descriptor &value) {
 void RTNAME(DestroyDescriptorStack)(void *opaquePtr) {
   DescriptorStack::destroy(getDescriptorStorage(opaquePtr));
 }
-
+RT_EXT_API_GROUP_END
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang-rt/lib/runtime/time-intrinsic.cpp b/flang-rt/lib/runtime/time-intrinsic.cpp
index 8988817a40064..a26bf1f2fa30c 100644
--- a/flang-rt/lib/runtime/time-intrinsic.cpp
+++ b/flang-rt/lib/runtime/time-intrinsic.cpp
@@ -44,6 +44,9 @@
 // should be preferred. Any other parameters required for SFINAE should have
 // default values provided.
 namespace {
+
+using namespace Fortran;
+
 // Types for the dummy parameter indicating the priority of a given overload.
 // We will invoke our helper with an integer literal argument, so the overload
 // with the highest priority should have the type int.
@@ -276,13 +279,13 @@ static void DateAndTimeUnavailable(Fortran::runtime::Terminator &terminator,
     char *zone, std::size_t zoneChars,
     const Fortran::runtime::Descriptor *values) {
   if (date) {
-    std::memset(date, static_cast<int>(' '), dateChars);
+    runtime::memset(date, static_cast<int>(' '), dateChars);
   }
   if (time) {
-    std::memset(time, static_cast<int>(' '), timeChars);
+    runtime::memset(time, static_cast<int>(' '), timeChars);
   }
   if (zone) {
-    std::memset(zone, static_cast<int>(' '), zoneChars);
+    runtime::memset(zone, static_cast<int>(' '), zoneChars);
   }
   if (values) {
     auto typeCode{values->type().GetCategoryAndKind()};
@@ -420,7 +423,7 @@ static void GetDateAndTime(Fortran::runtime::Terminator &terminator, char *date,
   auto copyBufferAndPad{
       [&](char *dest, std::size_t destChars, std::size_t len) {
         auto copyLen{std::min(len, destChars)};
-        std::memcpy(dest, buffer, copyLen);
+        runtime::memcpy(dest, buffer, copyLen);
         for (auto i{copyLen}; i < destChars; ++i) {
           dest[i] = ' ';
         }
@@ -525,8 +528,8 @@ void RTNAME(Etime)(const Descriptor *values, const Descriptor *time,
     ULARGE_INTEGER userSystemTime;
     ULARGE_INTEGER kernelSystemTime;
 
-    memcpy(&userSystemTime, &userTime, sizeof(FILETIME));
-    memcpy(&kernelSystemTime, &kernelTime, sizeof(FILETIME));
+    runtime::memcpy(&userSystemTime, &userTime, sizeof(FILETIME));
+    runtime::memcpy(&kernelSystemTime, &kernelTime, sizeof(FILETIME));
 
     usrTime = ((double)(userSystemTime.QuadPart)) / 10000000.0;
     sysTime = ((double)(kernelSystemTime.QuadPart)) / 10000000.0;
diff --git a/flang-rt/lib/runtime/tools.cpp b/flang-rt/lib/runtime/tools.cpp
index 24d05f369fcbe..03ee982d913bb 100644
--- a/flang-rt/lib/runtime/tools.cpp
+++ b/flang-rt/lib/runtime/tools.cpp
@@ -28,7 +28,7 @@ RT_API_ATTRS OwningPtr<char> SaveDefaultCharacter(
     const char *s, std::size_t length, const Terminator &terminator) {
   if (s) {
     auto *p{static_cast<char *>(AllocateMemoryOrCrash(terminator, length + 1))};
-    std::memcpy(p, s, length);
+    runtime::memcpy(p, s, length);
     p[length] = '\0';
     return OwningPtr<char>{p};
   } else {
@@ -75,10 +75,10 @@ RT_API_ATTRS void ToFortranDefaultCharacter(
     char *to, std::size_t toLength, const char *from) {
   std::size_t len{Fortran::runtime::strlen(from)};
   if (len < toLength) {
-    std::memcpy(to, from, len);
-    std::memset(to + len, ' ', toLength - len);
+    runtime::memcpy(to, from, len);
+    runtime::memset(to + len, ' ', toLength - len);
   } else {
-    std::memcpy(to, from, toLength);
+    runtime::memcpy(to, from, toLength);
   }
 }
 
@@ -127,10 +127,10 @@ RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous(
       toIt.Advance(), fromIt.Advance()) {
     // typeElementBytes == 1 when P is a char - the non-specialised case
     if constexpr (typeElementBytes != 1) {
-      std::memcpy(
+      runtime::memcpy(
           toIt.template Get<P>(), fromIt.template Get<P>(), typeElementBytes);
     } else {
-      std::memcpy(
+      runtime::memcpy(
           toIt.template Get<P>(), fromIt.template Get<P>(), elementBytes);
     }
   }
@@ -150,9 +150,9 @@ RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous(
   for (std::size_t n{to.Elements()}; n-- > 0;
       toAt += elementBytes, fromIt.Advance()) {
     if constexpr (typeElementBytes != 1) {
-      std::memcpy(toAt, fromIt.template Get<P>(), typeElementBytes);
+      runtime::memcpy(toAt, fromIt.template Get<P>(), typeElementBytes);
     } else {
-      std::memcpy(toAt, fromIt.template Get<P>(), elementBytes);
+      runtime::memcpy(toAt, fromIt.template Get<P>(), elementBytes);
     }
   }
 }
@@ -170,9 +170,9 @@ RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous(
   for (std::size_t n{to.Elements()}; n-- > 0;
       toIt.Advance(), fromAt += elementBytes) {
     if constexpr (typeElementBytes != 1) {
-      std::memcpy(toIt.template Get<P>(), fromAt, typeElementBytes);
+      runtime::memcpy(toIt.template Get<P>(), fromAt, typeElementBytes);
     } else {
-      std::memcpy(toIt.template Get<P>(), fromAt, elementBytes);
+      runtime::memcpy(toIt.template Get<P>(), fromAt, elementBytes);
     }
   }
 }
@@ -187,7 +187,7 @@ RT_API_ATTRS void ShallowCopyInner(const Descriptor &to, const Descriptor &from,
     bool toIsContiguous, bool fromIsContiguous) {
   if (toIsContiguous) {
     if (fromIsContiguous) {
-      std::memcpy(to.OffsetElement(), from.OffsetElement(),
+      runtime::memcpy(to.OffsetElement(), from.OffsetElement(),
           to.Elements() * to.ElementBytes());
     } else {
       ShallowCopyDiscontiguousToContiguous<P, RANK>(to, from);
@@ -277,7 +277,7 @@ RT_API_ATTRS char *EnsureNullTerminated(
     char *str, std::size_t length, Terminator &terminator) {
   if (runtime::memchr(str, '\0', length) == nullptr) {
     char *newCmd{(char *)AllocateMemoryOrCrash(terminator, length + 1)};
-    std::memcpy(newCmd, str, length);
+    runtime::memcpy(newCmd, str, length);
     newCmd[length] = '\0';
     return newCmd;
   } else {
@@ -309,7 +309,7 @@ RT_API_ATTRS std::int32_t CopyCharsToDescriptor(const Descriptor &value,
     return ToErrmsg(errmsg, StatValueTooShort);
   }
 
-  std::memcpy(value.OffsetElement(offset), rawValue, toCopy);
+  runtime::memcpy(value.OffsetElement(offset), rawValue, toCopy);
 
   if (static_cast<std::int64_t>(rawValueLength) > toCopy) {
     return ToErrmsg(errmsg, StatValueTooShort);
diff --git a/flang-rt/lib/runtime/transformational.cpp b/flang-rt/lib/runtime/transformational.cpp
index 3df314a4e966b..1869bfeb077aa 100644
--- a/flang-rt/lib/runtime/transformational.cpp
+++ b/flang-rt/lib/runtime/transformational.cpp
@@ -115,7 +115,7 @@ static RT_API_ATTRS void DefaultInitialize(
           "not yet implemented: CHARACTER(KIND=%d) in EOSHIFT intrinsic", kind);
     }
   } else {
-    std::memset(result.raw().base_addr, 0, bytes);
+    runtime::memset(result.raw().base_addr, 0, bytes);
   }
 }
 
diff --git a/flang-rt/lib/runtime/unit-map.cpp b/flang-rt/lib/runtime/unit-map.cpp
index 8fb0e8fd3f8f7..aa475d51b1648 100644
--- a/flang-rt/lib/runtime/unit-map.cpp
+++ b/flang-rt/lib/runtime/unit-map.cpp
@@ -118,7 +118,7 @@ ExternalFileUnit *UnitMap::Find(const char *path, std::size_t pathLen) {
     for (int j{0}; j < buckets_; ++j) {
       for (Chain *p{bucket_[j].get()}; p; p = p->next.get()) {
         if (p->unit.path() && p->unit.pathLength() == pathLen &&
-            std::memcmp(p->unit.path(), path, pathLen) == 0) {
+            runtime::memcmp(p->unit.path(), path, pathLen) == 0) {
           return &p->unit;
         }
       }
diff --git a/flang-rt/lib/runtime/unit.cpp b/flang-rt/lib/runtime/unit.cpp
index 5f52fa2781db5..da3783417f234 100644
--- a/flang-rt/lib/runtime/unit.cpp
+++ b/flang-rt/lib/runtime/unit.cpp
@@ -90,11 +90,11 @@ bool ExternalFileUnit::Emit(const char *data, std::size_t bytes,
   CheckDirectAccess(handler);
   WriteFrame(frameOffsetInFile_, recordOffsetInFrame_ + furthestAfter, handler);
   if (positionInRecord > furthestPositionInRecord) {
-    std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord, ' ',
-        positionInRecord - furthestPositionInRecord);
+    runtime::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord,
+        ' ', positionInRecord - furthestPositionInRecord);
   }
   char *to{Frame() + recordOffsetInFrame_ + positionInRecord};
-  std::memcpy(to, data, bytes);
+  runtime::memcpy(to, data, bytes);
   if (swapEndianness_) {
     SwapEndianness(to, bytes, elementBytes);
   }
@@ -119,7 +119,8 @@ bool ExternalFileUnit::Receive(char *data, std::size_t bytes,
   auto need{recordOffsetInFrame_ + furthestAfter};
   auto got{ReadFrame(frameOffsetInFile_, need, handler)};
   if (got >= need) {
-    std::memcpy(data, Frame() + recordOffsetInFrame_ + positionInRecord, bytes);
+    runtime::memcpy(
+        data, Frame() + recordOffsetInFrame_ + positionInRecord, bytes);
     if (swapEndianness_) {
       SwapEndianness(data, bytes, elementBytes);
     }
@@ -310,7 +311,8 @@ bool ExternalFileUnit::AdvanceRecord(IoErrorHandler &handler) {
         // Pad remainder of fixed length record
         WriteFrame(
             frameOffsetInFile_, recordOffsetInFrame_ + *openRecl, handler);
-        std::memset(Frame() + recordOffsetInFrame_ + furthestPositionInRecord,
+        runtime::memset(
+            Frame() + recordOffsetInFrame_ + furthestPositionInRecord,
             isUnformatted.value_or(false) ? 0 : ' ',
             *openRecl - furthestPositionInRecord);
         furthestPositionInRecord = *openRecl;
@@ -839,7 +841,7 @@ void ExternalFileUnit::PopChildIo(ChildIo &child) {
 std::uint32_t ExternalFileUnit::ReadHeaderOrFooter(std::int64_t frameOffset) {
   std::uint32_t word;
   char *wordPtr{reinterpret_cast<char *>(&word)};
-  std::memcpy(wordPtr, Frame() + frameOffset, sizeof word);
+  runtime::memcpy(wordPtr, Frame() + frameOffset, sizeof word);
   if (swapEndianness_) {
     SwapEndianness(wordPtr, sizeof word, sizeof word);
   }
diff --git a/flang-rt/lib/runtime/work-queue.cpp b/flang-rt/lib/runtime/work-queue.cpp
index 42dbc9064b03b..9ae751ae3367a 100644
--- a/flang-rt/lib/runtime/work-queue.cpp
+++ b/flang-rt/lib/runtime/work-queue.cpp
@@ -14,7 +14,7 @@
 
 namespace Fortran::runtime {
 
-#if !defined(RT_DEVICE_COMPILATION)
+#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
 // FLANG_RT_DEBUG code is disabled when false.
 static constexpr bool enableDebugOutput{false};
 #endif
@@ -79,7 +79,7 @@ RT_API_ATTRS Ticket &WorkQueue::StartTicket() {
     last_ = newTicket;
   }
   newTicket->ticket.begun = false;
-#if !defined(RT_DEVICE_COMPILATION)
+#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
   if (enableDebugOutput &&
       (executionEnvironment.internalDebugging &
           ExecutionEnvironment::WorkQueue)) {
@@ -93,7 +93,7 @@ RT_API_ATTRS int WorkQueue::Run() {
   while (last_) {
     TicketList *at{last_};
     insertAfter_ = last_;
-#if !defined(RT_DEVICE_COMPILATION)
+#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
     if (enableDebugOutput &&
         (executionEnvironment.internalDebugging &
             ExecutionEnvironment::WorkQueue)) {
@@ -102,7 +102,7 @@ RT_API_ATTRS int WorkQueue::Run() {
     }
 #endif
     int stat{at->ticket.Continue(*this)};
-#if !defined(RT_DEVICE_COMPILATION)
+#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
     if (enableDebugOutput &&
         (executionEnvironment.internalDebugging &
             ExecutionEnvironment::WorkQueue)) {
diff --git a/flang/include/flang/Common/Fortran-consts.h b/flang/include/flang/Common/Fortran-consts.h
index 74ef1c85d2c86..466fc8a5cf4bb 100644
--- a/flang/include/flang/Common/Fortran-consts.h
+++ b/flang/include/flang/Common/Fortran-consts.h
@@ -9,6 +9,7 @@
 #ifndef FORTRAN_COMMON_FORTRAN_CONSTS_H_
 #define FORTRAN_COMMON_FORTRAN_CONSTS_H_
 
+#include "api-attrs.h"
 #include "enum-class.h"
 #include <cstdint>
 
@@ -27,8 +28,10 @@ ENUM_CLASS(IoStmtKind, None, Backspace, Close, Endfile, Flush, Inquire, Open,
 ENUM_CLASS(
     DefinedIo, ReadFormatted, ReadUnformatted, WriteFormatted, WriteUnformatted)
 
+RT_OFFLOAD_VAR_GROUP_BEGIN
 // Fortran arrays may have up to 15 dimensions (See Fortran 2018 section 5.4.6).
 static constexpr int maxRank{15};
+RT_OFFLOAD_VAR_GROUP_END
 
 // Floating-point rounding modes; these are packed into a byte to save
 // room in the runtime's format processing context structure.  These
diff --git a/flang/include/flang/Common/constexpr-bitset.h b/flang/include/flang/Common/constexpr-bitset.h
index 1aafb6eff84c6..e60ff520ec63a 100644
--- a/flang/include/flang/Common/constexpr-bitset.h
+++ b/flang/include/flang/Common/constexpr-bitset.h
@@ -21,7 +21,7 @@
 #include <type_traits>
 
 namespace Fortran::common {
-
+RT_OFFLOAD_VAR_GROUP_BEGIN
 template <int BITS> class BitSet {
   static_assert(BITS > 0 && BITS <= 128);
   using Word = HostUnsignedIntType<(BITS <= 32 ? 32 : BITS)>;
@@ -143,5 +143,6 @@ template <int BITS> class BitSet {
 private:
   Word bits_{0};
 };
+RT_OFFLOAD_VAR_GROUP_END
 } // namespace Fortran::common
 #endif // FORTRAN_COMMON_CONSTEXPR_BITSET_H_
diff --git a/flang/include/flang/Decimal/binary-floating-point.h b/flang/include/flang/Decimal/binary-floating-point.h
index 1e0cde97d98e6..380ba958eae62 100644
--- a/flang/include/flang/Decimal/binary-floating-point.h
+++ b/flang/include/flang/Decimal/binary-floating-point.h
@@ -15,6 +15,7 @@
 #include "flang/Common/api-attrs.h"
 #include "flang/Common/real.h"
 #include "flang/Common/uint128.h"
+#include "flang/Runtime/freestanding-tools.h"
 #include <cinttypes>
 #include <climits>
 #include <cstring>
@@ -32,6 +33,7 @@ enum FortranRounding {
 
 template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
 public:
+  RT_OFFLOAD_VAR_GROUP_BEGIN
   static constexpr common::RealCharacteristics realChars{BINARY_PRECISION};
   static constexpr int binaryPrecision{BINARY_PRECISION};
   static constexpr int bits{realChars.bits};
@@ -47,7 +49,6 @@ template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
 
   using RawType = common::HostUnsignedIntType<bits>;
   static_assert(CHAR_BIT * sizeof(RawType) >= bits);
-  RT_OFFLOAD_VAR_GROUP_BEGIN
   static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
 
   constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero
@@ -68,7 +69,7 @@ template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
   template <typename A>
   explicit constexpr RT_API_ATTRS BinaryFloatingPointNumber(A x) {
     static_assert(sizeof raw_ <= sizeof x);
-    std::memcpy(reinterpret_cast<void *>(&raw_),
+    runtime::memcpy(reinterpret_cast<void *>(&raw_),
         reinterpret_cast<const void *>(&x), sizeof raw_);
   }
 
diff --git a/flang/include/flang/Runtime/allocator-registry-consts.h b/flang/include/flang/Runtime/allocator-registry-consts.h
index 70735c2fc7a71..a5f52749e945d 100644
--- a/flang/include/flang/Runtime/allocator-registry-consts.h
+++ b/flang/include/flang/Runtime/allocator-registry-consts.h
@@ -9,6 +9,8 @@
 #ifndef FORTRAN_RUNTIME_ALLOCATOR_REGISTRY_CONSTS_H_
 #define FORTRAN_RUNTIME_ALLOCATOR_REGISTRY_CONSTS_H_
 
+RT_OFFLOAD_VAR_GROUP_BEGIN
+
 static constexpr unsigned kDefaultAllocator = 0;
 
 // Allocator used for CUF
@@ -17,4 +19,6 @@ static constexpr unsigned kDeviceAllocatorPos = 2;
 static constexpr unsigned kManagedAllocatorPos = 3;
 static constexpr unsigned kUnifiedAllocatorPos = 4;
 
+RT_OFFLOAD_VAR_GROUP_END
+
 #endif /* FORTRAN_RUNTIME_ALLOCATOR_REGISTRY_CONSTS_H_ */
diff --git a/flang/include/flang/Runtime/freestanding-tools.h b/flang/include/flang/Runtime/freestanding-tools.h
index 3a492c1f320d0..bb51c3801a7d7 100644
--- a/flang/include/flang/Runtime/freestanding-tools.h
+++ b/flang/include/flang/Runtime/freestanding-tools.h
@@ -63,6 +63,25 @@
 #define STD_TOUPPER_UNSUPPORTED 1
 #endif
 
+#if defined(OMP_OFFLOAD_BUILD) && defined(OMP_NOHOST_BUILD) && \
+    defined(__clang__)
+#define STD_FILL_N_UNSUPPORTED 1
+#define STD_MEMSET_USE_BUILTIN 1
+#define STD_MEMSET_UNSUPPORTED 1
+#define STD_MEMCPY_USE_BUILTIN 1
+#define STD_MEMCPY_UNSUPPORTED 1
+#define STD_MEMMOVE_UNSUPPORTED 1
+#define STD_STRLEN_UNSUPPORTED 1
+#define STD_MEMCMP_UNSUPPORTED 1
+#define STD_REALLOC_UNSUPPORTED 1
+#define STD_MEMCHR_UNSUPPORTED 1
+#define STD_STRCPY_UNSUPPORTED 1
+#define STD_STRCMP_UNSUPPORTED 1
+#define STD_TOUPPER_UNSUPPORTED 1
+#define STD_ABORT_USE_BUILTIN 1
+#define STD_ABORT_UNSUPPORTED 1
+#endif
+
 namespace Fortran::runtime {
 
 #if STD_FILL_N_UNSUPPORTED
@@ -79,7 +98,51 @@ fill_n(A *start, std::size_t count, const B &value) {
 using std::fill_n;
 #endif // !STD_FILL_N_UNSUPPORTED
 
-#if STD_MEMMOVE_UNSUPPORTED
+#if STD_MEMSET_USE_BUILTIN
+static inline RT_API_ATTRS void memset(
+    void *dest, unsigned char value, std::size_t count) {
+  __builtin_memset(dest, value, count);
+}
+#elif STD_MEMSET_UNSUPPORTED
+static inline RT_API_ATTRS void memset(
+    void *dest, unsigned char value, std::size_t count) {
+  char *to{reinterpret_cast<char *>(dest)};
+  while (count--) {
+    *to++ = value;
+  }
+  return;
+}
+#else
+using std::memset;
+#endif
+
+#if STD_MEMCPY_USE_BUILTIN
+static inline RT_API_ATTRS void memcpy(
+    void *dest, const void *src, std::size_t count) {
+  __builtin_memcpy(dest, src, count);
+}
+#elif STD_MEMCPY_UNSUPPORTED
+static inline RT_API_ATTRS void memcpy(
+    void *dest, const void *src, std::size_t count) {
+  char *to{reinterpret_cast<char *>(dest)};
+  const char *from{reinterpret_cast<const char *>(src)};
+  if (to == from) {
+    return;
+  }
+  while (count--) {
+    *to++ = *from++;
+  }
+}
+#else
+using std::memcpy;
+#endif
+
+#if STD_MEMMOVE_USE_BUILTIN
+static inline RT_API_ATTRS void memmove(
+    void *dest, const void *src, std::size_t count) {
+  __builtin_memmove(dest, src, count);
+}
+#elif STD_MEMMOVE_UNSUPPORTED
 // Provides alternative implementation for std::memmove(), if
 // it is not supported.
 static inline RT_API_ATTRS void *memmove(
@@ -91,7 +154,7 @@ static inline RT_API_ATTRS void *memmove(
     return dest;
   }
   if (to + count <= from || from + count <= to) {
-    std::memcpy(dest, src, count);
+    memcpy(dest, src, count);
   } else if (to < from) {
     while (count--) {
       *to++ = *from++;
@@ -112,13 +175,17 @@ using std::memmove;
 using MemmoveFct = void *(*)(void *, const void *, std::size_t);
 
 #ifdef RT_DEVICE_COMPILATION
-static RT_API_ATTRS void *MemmoveWrapper(
+[[maybe_unused]] static RT_API_ATTRS void *MemmoveWrapper(
     void *dest, const void *src, std::size_t count) {
   return Fortran::runtime::memmove(dest, src, count);
 }
 #endif
 
-#if STD_STRLEN_UNSUPPORTED
+#if STD_STRLEN_USE_BUILTIN
+static inline RT_API_ATTRS std::size_t strlen(const char *str) {
+  return __builtin_strlen(str);
+}
+#elif STD_STRLEN_UNSUPPORTED
 // Provides alternative implementation for std::strlen(), if
 // it is not supported.
 static inline RT_API_ATTRS std::size_t strlen(const char *str) {
diff --git a/flang/include/flang/Runtime/stop.h b/flang/include/flang/Runtime/stop.h
index 4ddc5cf49ec8f..81c28904efcbe 100644
--- a/flang/include/flang/Runtime/stop.h
+++ b/flang/include/flang/Runtime/stop.h
@@ -30,7 +30,9 @@ NORETURN void RTNAME(ProgramEndStatement)(NO_ARGUMENTS);
 
 // Extensions
 NORETURN void RTNAME(Exit)(int status DEFAULT_VALUE(EXIT_SUCCESS));
+RT_OFFLOAD_API_GROUP_BEGIN
 NORETURN void RTNAME(Abort)(NO_ARGUMENTS);
+RT_OFFLOAD_API_GROUP_END
 void FORTRAN_PROCEDURE_NAME(backtrace)(NO_ARGUMENTS);
 
 // Crash with an error message when the program dynamically violates a Fortran

>From 735d142704dc90bd21f8d34aa5151616f28cab98 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 29 Aug 2025 21:28:33 +0000
Subject: [PATCH 5/6] [fuzzer] Fix-forward fix-forward CrossOverTest.cpp

https://github.com/llvm/llvm-project/pull/156103 was missing the return
value
---
 compiler-rt/test/fuzzer/CrossOverTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/fuzzer/CrossOverTest.cpp b/compiler-rt/test/fuzzer/CrossOverTest.cpp
index 6d764d0b6a6bd..eb8a8c44ad47e 100644
--- a/compiler-rt/test/fuzzer/CrossOverTest.cpp
+++ b/compiler-rt/test/fuzzer/CrossOverTest.cpp
@@ -47,7 +47,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     *NullPtr = 0;
   // It's UB to read *Data when Size == 0
   if (Size == 0)
-    return;
+    return 0;
   if (*Data == 'A')
     Sink++;
   if (*Data == 'Z')

>From e1a83a31399fab8c4a3597b77318536f6a33abd2 Mon Sep 17 00:00:00 2001
From: Brock Denson <mbdenson at gmail.com>
Date: Fri, 29 Aug 2025 15:58:47 -0500
Subject: [PATCH 6/6] Accept PR review changes less unreachable

---
 llvm/lib/Support/Mustache.cpp | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 8e852cb6345f3..46a791505b71e 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -168,7 +168,7 @@ class ASTNode {
 
   void indentTextNode(std::string &Body, size_t Indentation, bool FinalNode);
 
-  void indentNodes(ASTNode *Node, bool isPartial);
+  void indentNodes(ASTNode *Node, bool IsPartial);
 
   void renderPartial(const llvm::json::Value &Contexts, llvm::raw_ostream &OS,
                      ASTNode *Partial);
@@ -687,40 +687,41 @@ void ASTNode::renderChild(const json::Value &Contexts, llvm::raw_ostream &OS) {
 
 void ASTNode::indentTextNode(std::string &Body, size_t Indentation,
                              bool FinalNode) {
-  std::string spaces(Indentation, ' ');
-  size_t pos = 0;
+  std::string Spaces(Indentation, ' ');
+  size_t Pos = 0;
   size_t LastChar = std::string::npos;
 
   if (FinalNode)
-    // body.erase(body.find_last_not_of(" \t\r\f\v") + 1);
     LastChar = Body.find_last_not_of(" \t\r\f\v");
 
   while ((pos = Body.find('\n', pos)) != std::string::npos) {
-    if ((!FinalNode) || (pos != LastChar)) {
-      Body.insert(pos + 1, spaces);
-      pos += 1 + Indentation;
-    } else {
+    if (FinalNode && (pos == LastChar))
       break;
-    }
+
+    Body.insert(pos + 1, Spaces);
+    pos += 1 + Indentation;
   }
 }
 
-void ASTNode::indentNodes(ASTNode *Node, bool isPartial) {
-  size_t size = Node->Children.size();
+void ASTNode::indentNodes(ASTNode *Node, bool IsPartial) {
+  size_t Size = Node->Children.size();
 
-  for (size_t i = 0; i < size; ++i) {
-    ASTNode *child = Node->Children[i].get();
-    switch (child->Ty) {
+  for (size_t i = 0; i < Size; ++i) {
+    ASTNode *Child = Node->Children[i].get();
+    switch (Child->Ty) {
     case ASTNode::Text: {
-      indentTextNode(child->Body, Indentation, ((i == size - 1) && isPartial));
+      // Only track the final node for partials.
+      bool IsFinalNode = ((i == Size - 1) && IsPartial);
+      indentTextNode(Child->Body, Indentation, IsFinalNode);
       break;
     }
     case ASTNode::Section: {
-      indentNodes(child, false);
+      indentNodes(Child, false);
       break;
     }
     case ASTNode::Partial: {
-      indentNodes(child, true);
+      indentNodes(Child, true);
+      break;
     }
     case ASTNode::Root:
     case ASTNode::Variable:



More information about the llvm-commits mailing list