[llvm] 183f1ac - [LLVM] Fix an ASAN error in `lookupLLVMIntrinsicByName` (#147444)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 11:07:51 PDT 2025
Author: Rahul Joshi
Date: 2025-07-15T11:07:48-07:00
New Revision: 183f1ac412c1a0c69c7dc6aef85c28c0749609cb
URL: https://github.com/llvm/llvm-project/commit/183f1ac412c1a0c69c7dc6aef85c28c0749609cb
DIFF: https://github.com/llvm/llvm-project/commit/183f1ac412c1a0c69c7dc6aef85c28c0749609cb.diff
LOG: [LLVM] Fix an ASAN error in `lookupLLVMIntrinsicByName` (#147444)
Fix unnecessary conversion of C-String to StringRef in the `Cmp` lambda
inside `lookupLLVMIntrinsicByName`. This both fixes an ASAN error in the
code that happens when the `Name` StringRef passed in is not a Null
terminated StringRef, and additionally can potentially speed up the code
as well by eliminating the unnecessary computation of string length
every time a C String is converted to StringRef in this code (It seems
practically this computation is eliminated in optimized builds, but this
will avoid it in O0 builds as well).
Added a unit test that demonstrates this issue by building LLVM with
these options:
```
CMAKE_BUILD_TYPE=Debug
LLVM_USE_SANITIZER=Address
LLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
```
The error reported is as follows:
```
==462665==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x5030000391a2 at pc 0x56525cc30bbf bp 0x7fff9e4ccc60 sp 0x7fff9e4cc428
READ of size 19 at 0x5030000391a2 thread T0
#0 0x56525cc30bbe in strlen (upstream-llvm-second/llvm-project/build/unittests/IR/IRTests+0x713bbe) (BuildId: 0651acf1e582a4d2)
#1 0x7f8ff22ad334 in std::char_traits<char>::length(char const*) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/char_traits.h:399:9
#2 0x7f8ff22a34a0 in llvm::StringRef::StringRef(char const*) /home/rjoshi/upstream-llvm-second/llvm-project/llvm/include/llvm/ADT/StringRef.h:96:33
#3 0x7f8ff28ca184 in _ZZL25lookupLLVMIntrinsicByNameN4llvm8ArrayRefIjEENS_9StringRefES2_ENK3$_0clIjPKcEEDaT_T0_ upstream-llvm-second/llvm-project/llvm/lib/IR/Intrinsics.cpp:673:18
```
Added:
Modified:
llvm/include/llvm/ADT/StringTable.h
llvm/lib/IR/Intrinsics.cpp
llvm/unittests/IR/IntrinsicsTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h
index b3c4a414ed6b4..c089a070d4b57 100644
--- a/llvm/include/llvm/ADT/StringTable.h
+++ b/llvm/include/llvm/ADT/StringTable.h
@@ -85,14 +85,18 @@ class StringTable {
"Last byte must be a null byte.");
}
- // Get a string from the table starting with the provided offset. The returned
- // `StringRef` is in fact null terminated, and so can be converted safely to a
- // C-string if necessary for a system API.
- constexpr StringRef operator[](Offset O) const {
+ // Returns the raw C string from the table starting with the provided offset.
+ // The returned string is null terminated.
+ constexpr const char *getCString(Offset O) const {
assert(O.value() < Table.size() && "Out of bounds offset!");
return Table.data() + O.value();
}
+ // Get a string from the table starting with the provided offset. The returned
+ // `StringRef` is in fact null terminated, and so can be converted safely to a
+ // C-string if necessary for a system API.
+ constexpr StringRef operator[](Offset O) const { return getCString(O); }
+
/// Returns the byte size of the table.
constexpr size_t size() const { return Table.size(); }
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index d2632d50dff06..6c35ade3e57c5 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -661,20 +661,20 @@ static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,
// `equal_range` requires the comparison to work with either side being an
// offset or the value. Detect which kind each side is to set up the
// compared strings.
- StringRef LHSStr;
- if constexpr (std::is_integral_v<decltype(LHS)>) {
- LHSStr = IntrinsicNameTable[LHS];
- } else {
+ const char *LHSStr;
+ if constexpr (std::is_integral_v<decltype(LHS)>)
+ LHSStr = IntrinsicNameTable.getCString(LHS);
+ else
LHSStr = LHS;
- }
- StringRef RHSStr;
- if constexpr (std::is_integral_v<decltype(RHS)>) {
- RHSStr = IntrinsicNameTable[RHS];
- } else {
+
+ const char *RHSStr;
+ if constexpr (std::is_integral_v<decltype(RHS)>)
+ RHSStr = IntrinsicNameTable.getCString(RHS);
+ else
RHSStr = RHS;
- }
- return strncmp(LHSStr.data() + CmpStart, RHSStr.data() + CmpStart,
- CmpEnd - CmpStart) < 0;
+
+ return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
+ 0;
};
LastLow = Low;
std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index ad9af2075e371..49af83609d98c 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -80,6 +80,32 @@ TEST(IntrinsicNameLookup, Basic) {
EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024"));
}
+TEST(IntrinsicNameLookup, NonNullterminatedStringRef) {
+ using namespace Intrinsic;
+ // This reproduces an issue where lookupIntrinsicID() can access memory beyond
+ // the bounds of the passed in StringRef. For ASAN to catch this as an error,
+ // create a StringRef using heap allocated memory and make it not null
+ // terminated.
+
+ // ASAN will report a "AddressSanitizer: heap-buffer-overflow" error in
+ // `lookupLLVMIntrinsicByName` when LLVM is built with these options:
+ // -DCMAKE_BUILD_TYPE=Debug
+ // -DLLVM_USE_SANITIZER=Address
+ // -DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
+
+ // Make an intrinsic name "llvm.memcpy.inline" on the heap.
+ std::string Name = "llvm.memcpy.inline";
+ assert(Name.size() == 18);
+ // Create a StringRef backed by heap allocated memory such that OOB access
+ // in that StringRef can be flagged by asan. Here, the String `S` is of size
+ // 18, and backed by a heap allocated buffer `Data`, so access to S[18] will
+ // be flagged bby asan.
+ auto Data = std::make_unique<char[]>(Name.size());
+ std::strncpy(Data.get(), Name.data(), Name.size());
+ StringRef S(Data.get(), Name.size());
+ EXPECT_EQ(memcpy_inline, lookupIntrinsicID(S));
+}
+
// Tests to verify getIntrinsicForClangBuiltin.
TEST(IntrinsicNameLookup, ClangBuiltinLookup) {
using namespace Intrinsic;
More information about the llvm-commits
mailing list