[llvm] [LLVM] Fix an ASAN error in `lookupLLVMIntrinsicByName` (PR #147444)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 15 07:21:00 PDT 2025


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/147444

>From 159091e4dc422fc262a8575e4a12b14f2451e554 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Mon, 7 Jul 2025 18:52:48 -0700
Subject: [PATCH 1/3] [LLVM] Fix an ASAN error in `lookupLLVMIntrinsicByName`

Fix unnecessary conversion of C-String to StringRef in the `Cmp` lambda
inside `lookupLLVMIntrinsicByName`. This both fixes an ASAN error in the
code that happens when the `Name` StringRef passed in is not a Null
terminated StringRef, and additionally should potentially speed up the
code as well by eliminating the unnecessary computation of string length
every time a C String is converted to StringRef in this code (It seems
practically this computation is eliminated in optimized builds, but this
will avoid it at O0 builds as well).

Added a unit test that demomstrates this issue by building LLVM with
these options:

```
-DCMAKE_BUILD_TYPE=Debug
-DLLVM_USE_SANITIZER=Address`
-DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
```

The error reported is as follows:

```
==462665==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x5030000391a2 at pc 0x56525cc30bbf bp 0x7fff9e4ccc60 sp 0x7fff9e4cc428
READ of size 19 at 0x5030000391a2 thread T0
    #0 0x56525cc30bbe in strlen (upstream-llvm-second/llvm-project/build/unittests/IR/IRTests+0x713bbe) (BuildId: 0651acf1e582a4d2)
    #1 0x7f8ff22ad334 in std::char_traits<char>::length(char const*) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/char_traits.h:399:9
    #2 0x7f8ff22a34a0 in llvm::StringRef::StringRef(char const*) /home/rjoshi/upstream-llvm-second/llvm-project/llvm/include/llvm/ADT/StringRef.h:96:33
    #3 0x7f8ff28ca184 in _ZZL25lookupLLVMIntrinsicByNameN4llvm8ArrayRefIjEENS_9StringRefES2_ENK3$_0clIjPKcEEDaT_T0_ upstream-llvm-second/llvm-project/llvm/lib/IR/Intrinsics.cpp:673:18
```
---
 llvm/include/llvm/ADT/StringTable.h  | 12 ++++++++----
 llvm/lib/IR/Intrinsics.cpp           | 24 ++++++++++++------------
 llvm/unittests/IR/IntrinsicsTest.cpp | 22 ++++++++++++++++++++++
 3 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h
index b3c4a414ed6b4..c089a070d4b57 100644
--- a/llvm/include/llvm/ADT/StringTable.h
+++ b/llvm/include/llvm/ADT/StringTable.h
@@ -85,14 +85,18 @@ class StringTable {
            "Last byte must be a null byte.");
   }
 
-  // Get a string from the table starting with the provided offset. The returned
-  // `StringRef` is in fact null terminated, and so can be converted safely to a
-  // C-string if necessary for a system API.
-  constexpr StringRef operator[](Offset O) const {
+  // Returns the raw C string from the table starting with the provided offset.
+  // The returned string is null terminated.
+  constexpr const char *getCString(Offset O) const {
     assert(O.value() < Table.size() && "Out of bounds offset!");
     return Table.data() + O.value();
   }
 
+  // Get a string from the table starting with the provided offset. The returned
+  // `StringRef` is in fact null terminated, and so can be converted safely to a
+  // C-string if necessary for a system API.
+  constexpr StringRef operator[](Offset O) const { return getCString(O); }
+
   /// Returns the byte size of the table.
   constexpr size_t size() const { return Table.size(); }
 
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index e631419d5e1c2..68ab15eab1aea 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -660,20 +660,20 @@ static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,
       // `equal_range` requires the comparison to work with either side being an
       // offset or the value. Detect which kind each side is to set up the
       // compared strings.
-      StringRef LHSStr;
-      if constexpr (std::is_integral_v<decltype(LHS)>) {
-        LHSStr = IntrinsicNameTable[LHS];
-      } else {
+      const char *LHSStr;
+      if constexpr (std::is_integral_v<decltype(LHS)>)
+        LHSStr = IntrinsicNameTable.getCString(LHS);
+      else
         LHSStr = LHS;
-      }
-      StringRef RHSStr;
-      if constexpr (std::is_integral_v<decltype(RHS)>) {
-        RHSStr = IntrinsicNameTable[RHS];
-      } else {
+
+      const char *RHSStr;
+      if constexpr (std::is_integral_v<decltype(RHS)>)
+        RHSStr = IntrinsicNameTable.getCString(RHS);
+      else
         RHSStr = RHS;
-      }
-      return strncmp(LHSStr.data() + CmpStart, RHSStr.data() + CmpStart,
-                     CmpEnd - CmpStart) < 0;
+
+      return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
+             0;
     };
     LastLow = Low;
     std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index ad9af2075e371..fff13d51897f8 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -80,6 +80,28 @@ TEST(IntrinsicNameLookup, Basic) {
   EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024"));
 }
 
+TEST(IntrinsicNameLookup, NonNullterminatedStringRef) {
+  using namespace Intrinsic;
+  // This reproduces an issue where lookupIntrinsicID() can access memory beyond
+  // the bounds of the passed in StringRef. For ASAN to catch this as an error,
+  // create a StringRef using heap allocated memory and make it not null
+  // terminated.
+
+  // ASAN will report a "AddressSanitizer: heap-buffer-overflow" error in
+  // `lookupLLVMIntrinsicByName` when LLVM is built with these options:
+  //  -DCMAKE_BUILD_TYPE=Debug
+  //  -DLLVM_USE_SANITIZER=Address
+  //  -DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
+
+  // Make an intrinsic name "llvm.memcpy.inline" on the heap.
+  std::string Name = "llvm.memcpy.inline";
+  assert(Name.size() == 18);
+  std::unique_ptr<char[]> Data = std::make_unique<char[]>(Name.size());
+  std::strncpy(Data.get(), Name.data(), Name.size());
+  StringRef S(Data.get(), Name.size());
+  EXPECT_EQ(memcpy_inline, lookupIntrinsicID(S));
+}
+
 // Tests to verify getIntrinsicForClangBuiltin.
 TEST(IntrinsicNameLookup, ClangBuiltinLookup) {
   using namespace Intrinsic;

>From 71ce9522ca6a1b7a76b4149a1d1ce35b0a0c6872 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Tue, 8 Jul 2025 10:32:14 -0700
Subject: [PATCH 2/3] Review feedback

---
 llvm/unittests/IR/IntrinsicsTest.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index fff13d51897f8..1183ed08b468e 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -96,6 +96,10 @@ TEST(IntrinsicNameLookup, NonNullterminatedStringRef) {
   // Make an intrinsic name "llvm.memcpy.inline" on the heap.
   std::string Name = "llvm.memcpy.inline";
   assert(Name.size() == 18);
+  // Create a StringRef backed by heap allocated memory such that OOB access
+  // in that StringRef can be flagged by asan. Here, the String `S` is of size
+  // 18, and backed by a heap allocated buffer `Data`, so access to S[18] will
+  // be flagged bby asan.
   std::unique_ptr<char[]> Data = std::make_unique<char[]>(Name.size());
   std::strncpy(Data.get(), Name.data(), Name.size());
   StringRef S(Data.get(), Name.size());

>From f44dede373429120035fd15970bfbaa7b1ee373b Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Tue, 15 Jul 2025 07:20:30 -0700
Subject: [PATCH 3/3] Review feedback

---
 llvm/unittests/IR/IntrinsicsTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index 1183ed08b468e..49af83609d98c 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -100,7 +100,7 @@ TEST(IntrinsicNameLookup, NonNullterminatedStringRef) {
   // in that StringRef can be flagged by asan. Here, the String `S` is of size
   // 18, and backed by a heap allocated buffer `Data`, so access to S[18] will
   // be flagged bby asan.
-  std::unique_ptr<char[]> Data = std::make_unique<char[]>(Name.size());
+  auto Data = std::make_unique<char[]>(Name.size());
   std::strncpy(Data.get(), Name.data(), Name.size());
   StringRef S(Data.get(), Name.size());
   EXPECT_EQ(memcpy_inline, lookupIntrinsicID(S));



More information about the llvm-commits mailing list