[llvm] [LLVM] Fix an ASAN error in `lookupLLVMIntrinsicByName` (PR #147444)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 7 19:00:19 PDT 2025


https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/147444

Fix unnecessary conversion of C-String to StringRef in the `Cmp` lambda inside `lookupLLVMIntrinsicByName`. This both fixes an ASAN error in the code that happens when the `Name` StringRef passed in is not a Null terminated StringRef, and additionally should potentially speed up the code as well by eliminating the unnecessary computation of string length every time a C String is converted to StringRef in this code (It seems practically this computation is eliminated in optimized builds, but this will avoid it at O0 builds as well).

Added a unit test that demomstrates this issue by building LLVM with these options:

```
-DCMAKE_BUILD_TYPE=Debug
-DLLVM_USE_SANITIZER=Address`
-DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
```

The error reported is as follows:

```
==462665==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x5030000391a2 at pc 0x56525cc30bbf bp 0x7fff9e4ccc60 sp 0x7fff9e4cc428
READ of size 19 at 0x5030000391a2 thread T0
    #0 0x56525cc30bbe in strlen (upstream-llvm-second/llvm-project/build/unittests/IR/IRTests+0x713bbe) (BuildId: 0651acf1e582a4d2)
    #1 0x7f8ff22ad334 in std::char_traits<char>::length(char const*) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/char_traits.h:399:9
    #2 0x7f8ff22a34a0 in llvm::StringRef::StringRef(char const*) /home/rjoshi/upstream-llvm-second/llvm-project/llvm/include/llvm/ADT/StringRef.h:96:33
    #3 0x7f8ff28ca184 in _ZZL25lookupLLVMIntrinsicByNameN4llvm8ArrayRefIjEENS_9StringRefES2_ENK3$_0clIjPKcEEDaT_T0_ upstream-llvm-second/llvm-project/llvm/lib/IR/Intrinsics.cpp:673:18
```

>From 159091e4dc422fc262a8575e4a12b14f2451e554 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Mon, 7 Jul 2025 18:52:48 -0700
Subject: [PATCH] [LLVM] Fix an ASAN error in `lookupLLVMIntrinsicByName`

Fix unnecessary conversion of C-String to StringRef in the `Cmp` lambda
inside `lookupLLVMIntrinsicByName`. This both fixes an ASAN error in the
code that happens when the `Name` StringRef passed in is not a Null
terminated StringRef, and additionally should potentially speed up the
code as well by eliminating the unnecessary computation of string length
every time a C String is converted to StringRef in this code (It seems
practically this computation is eliminated in optimized builds, but this
will avoid it at O0 builds as well).

Added a unit test that demomstrates this issue by building LLVM with
these options:

```
-DCMAKE_BUILD_TYPE=Debug
-DLLVM_USE_SANITIZER=Address`
-DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
```

The error reported is as follows:

```
==462665==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x5030000391a2 at pc 0x56525cc30bbf bp 0x7fff9e4ccc60 sp 0x7fff9e4cc428
READ of size 19 at 0x5030000391a2 thread T0
    #0 0x56525cc30bbe in strlen (upstream-llvm-second/llvm-project/build/unittests/IR/IRTests+0x713bbe) (BuildId: 0651acf1e582a4d2)
    #1 0x7f8ff22ad334 in std::char_traits<char>::length(char const*) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/char_traits.h:399:9
    #2 0x7f8ff22a34a0 in llvm::StringRef::StringRef(char const*) /home/rjoshi/upstream-llvm-second/llvm-project/llvm/include/llvm/ADT/StringRef.h:96:33
    #3 0x7f8ff28ca184 in _ZZL25lookupLLVMIntrinsicByNameN4llvm8ArrayRefIjEENS_9StringRefES2_ENK3$_0clIjPKcEEDaT_T0_ upstream-llvm-second/llvm-project/llvm/lib/IR/Intrinsics.cpp:673:18
```
---
 llvm/include/llvm/ADT/StringTable.h  | 12 ++++++++----
 llvm/lib/IR/Intrinsics.cpp           | 24 ++++++++++++------------
 llvm/unittests/IR/IntrinsicsTest.cpp | 22 ++++++++++++++++++++++
 3 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h
index b3c4a414ed6b4..c089a070d4b57 100644
--- a/llvm/include/llvm/ADT/StringTable.h
+++ b/llvm/include/llvm/ADT/StringTable.h
@@ -85,14 +85,18 @@ class StringTable {
            "Last byte must be a null byte.");
   }
 
-  // Get a string from the table starting with the provided offset. The returned
-  // `StringRef` is in fact null terminated, and so can be converted safely to a
-  // C-string if necessary for a system API.
-  constexpr StringRef operator[](Offset O) const {
+  // Returns the raw C string from the table starting with the provided offset.
+  // The returned string is null terminated.
+  constexpr const char *getCString(Offset O) const {
     assert(O.value() < Table.size() && "Out of bounds offset!");
     return Table.data() + O.value();
   }
 
+  // Get a string from the table starting with the provided offset. The returned
+  // `StringRef` is in fact null terminated, and so can be converted safely to a
+  // C-string if necessary for a system API.
+  constexpr StringRef operator[](Offset O) const { return getCString(O); }
+
   /// Returns the byte size of the table.
   constexpr size_t size() const { return Table.size(); }
 
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index e631419d5e1c2..68ab15eab1aea 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -660,20 +660,20 @@ static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,
       // `equal_range` requires the comparison to work with either side being an
       // offset or the value. Detect which kind each side is to set up the
       // compared strings.
-      StringRef LHSStr;
-      if constexpr (std::is_integral_v<decltype(LHS)>) {
-        LHSStr = IntrinsicNameTable[LHS];
-      } else {
+      const char *LHSStr;
+      if constexpr (std::is_integral_v<decltype(LHS)>)
+        LHSStr = IntrinsicNameTable.getCString(LHS);
+      else
         LHSStr = LHS;
-      }
-      StringRef RHSStr;
-      if constexpr (std::is_integral_v<decltype(RHS)>) {
-        RHSStr = IntrinsicNameTable[RHS];
-      } else {
+
+      const char *RHSStr;
+      if constexpr (std::is_integral_v<decltype(RHS)>)
+        RHSStr = IntrinsicNameTable.getCString(RHS);
+      else
         RHSStr = RHS;
-      }
-      return strncmp(LHSStr.data() + CmpStart, RHSStr.data() + CmpStart,
-                     CmpEnd - CmpStart) < 0;
+
+      return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
+             0;
     };
     LastLow = Low;
     std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index ad9af2075e371..fff13d51897f8 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -80,6 +80,28 @@ TEST(IntrinsicNameLookup, Basic) {
   EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024"));
 }
 
+TEST(IntrinsicNameLookup, NonNullterminatedStringRef) {
+  using namespace Intrinsic;
+  // This reproduces an issue where lookupIntrinsicID() can access memory beyond
+  // the bounds of the passed in StringRef. For ASAN to catch this as an error,
+  // create a StringRef using heap allocated memory and make it not null
+  // terminated.
+
+  // ASAN will report a "AddressSanitizer: heap-buffer-overflow" error in
+  // `lookupLLVMIntrinsicByName` when LLVM is built with these options:
+  //  -DCMAKE_BUILD_TYPE=Debug
+  //  -DLLVM_USE_SANITIZER=Address
+  //  -DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
+
+  // Make an intrinsic name "llvm.memcpy.inline" on the heap.
+  std::string Name = "llvm.memcpy.inline";
+  assert(Name.size() == 18);
+  std::unique_ptr<char[]> Data = std::make_unique<char[]>(Name.size());
+  std::strncpy(Data.get(), Name.data(), Name.size());
+  StringRef S(Data.get(), Name.size());
+  EXPECT_EQ(memcpy_inline, lookupIntrinsicID(S));
+}
+
 // Tests to verify getIntrinsicForClangBuiltin.
 TEST(IntrinsicNameLookup, ClangBuiltinLookup) {
   using namespace Intrinsic;



More information about the llvm-commits mailing list