[llvm] a6e1700 - [Utils][Local] Preserve !nosanitize in combineMetadata when merging instructions (#148376)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 14 00:45:12 PDT 2025


Author: Kunqiu Chen
Date: 2025-07-14T15:45:08+08:00
New Revision: a6e1700fa622f75436c3c3ead23b1eefb67e88ea

URL: https://github.com/llvm/llvm-project/commit/a6e1700fa622f75436c3c3ead23b1eefb67e88ea
DIFF: https://github.com/llvm/llvm-project/commit/a6e1700fa622f75436c3c3ead23b1eefb67e88ea.diff

LOG: [Utils][Local] Preserve !nosanitize in combineMetadata when merging instructions (#148376)

`combineMetadata` helper currently drops `!nosanitize` metadata when
merging two instructions, even if both originally carried `!nosanitize`.

This is problematic because `!nosanitize` is a key mechanism used by
sanitizer (e.g., ASan) to suppress instrumentation. Removing it can lead
to unintended sanitizer behavior.

This patch adds `nosanitize` to the whitelist in combineMetadata,
preserving it only if both instructions carry `!nosanitize`; otherwise,
it is dropped. This patch also adds corresponding tests in a test file
and regenerates it.

---
### Details

**Example (see [Godbolt](https://godbolt.org/z/83P5eWczx) for
details**):

```llvm
%v1 = load i32, ptr %p, !nosanitize
%v2 = load i32, ptr %p, !nosanitize
```
When merged via `combineMetadata(%v1, %v2, ...)`, the resulting
instruction loses its `!nosanitize` metadata.

Tools such as UBSan and AFL rely on `nosanitize` to prevent unwanted
transformations or checks. However, the current implementation of
combineMetadata mistakenly drops !nosanitize. This may lead to
unintended behavior during optimization.

For example, under `-fsanitize=address,undefined -O2`, IR emitted by
UBSan may lose its `!nosanitize` metadata due to the incorrect metadata
merging in optimization. As a result, ASan could unexpectedly instrument
those instructions.
> Note: due to the current UBSan handlers having relatively
coarse-grained attributes, this specific case is difficult to reproduce
end-to-end from source code—UBSan currently inhibits such optimizations
(refer to #135135 for details).

Still, I believe it's necessary to fix this now, to support future
versions of UBSan that might allow such optimizations, and to support
third-party tools (such as AFL-based fuzzers) that rely on the presence
of !nosanitize.

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/Local.cpp
    llvm/test/Transforms/GVN/metadata.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index f5208d50c6aae..81d85375b9e1d 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3381,7 +3381,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
           K->setMetadata(Kind,
                          MDNode::getMostGenericNoaliasAddrspace(JMD, KMD));
         break;
-    }
+      case LLVMContext::MD_nosanitize:
+        // Preserve !nosanitize if both K and J have it.
+        K->setMetadata(Kind, JMD);
+        break;
+      }
   }
   // Set !invariant.group from J if J has it. If both instructions have it
   // then we will just pick it from J - even when they are 
diff erent.

diff  --git a/llvm/test/Transforms/GVN/metadata.ll b/llvm/test/Transforms/GVN/metadata.ll
index a5dbb5ee06070..ff055d889eac2 100644
--- a/llvm/test/Transforms/GVN/metadata.ll
+++ b/llvm/test/Transforms/GVN/metadata.ll
@@ -112,7 +112,7 @@ define i32 @test8(ptr %p) {
 define i32 @load_noundef_load(ptr %p) {
 ; CHECK-LABEL: define i32 @load_noundef_load
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef !6
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef [[META6:![0-9]+]]
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -138,7 +138,7 @@ define i32 @load_load_noundef(ptr %p) {
 define void @load_dereferenceable_dominating(ptr %p) {
 ; CHECK-LABEL: define void @load_dereferenceable_dominating
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7
+; CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable [[META7:![0-9]+]]
 ; CHECK-NEXT:    call void @use.ptr(ptr [[A]])
 ; CHECK-NEXT:    call void @use.ptr(ptr [[A]])
 ; CHECK-NEXT:    ret void
@@ -185,7 +185,7 @@ define void @load_ptr_nonnull_to_i64(ptr %p) {
 define void @load_ptr_nonnull_noundef_to_i64(ptr %p) {
 ; CHECK-LABEL: define void @load_ptr_nonnull_noundef_to_i64
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !nonnull !6, !noundef !6
+; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META6]], !noundef [[META6]]
 ; CHECK-NEXT:    [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
@@ -202,7 +202,7 @@ define void @load_ptr_nonnull_noundef_to_i64(ptr %p) {
 define void @load_ptr_invariant_load_to_i64(ptr %p) {
 ; CHECK-LABEL: define void @load_ptr_invariant_load_to_i64
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !invariant.load !6
+; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !invariant.load [[META6]]
 ; CHECK-NEXT:    [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
@@ -219,7 +219,7 @@ define void @load_ptr_invariant_load_to_i64(ptr %p) {
 define void @load_ptr_dereferenceable_to_i64(ptr %p) {
 ; CHECK-LABEL: define void @load_ptr_dereferenceable_to_i64
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7
+; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable [[META7]]
 ; CHECK-NEXT:    [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
@@ -236,7 +236,7 @@ define void @load_ptr_dereferenceable_to_i64(ptr %p) {
 define void @load_ptr_dereferenceable_or_null_to_i64(ptr %p) {
 ; CHECK-LABEL: define void @load_ptr_dereferenceable_or_null_to_i64
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null !7
+; CHECK-NEXT:    [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null [[META7]]
 ; CHECK-NEXT:    [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
 ; CHECK-NEXT:    call void @use.i64(i64 [[VAL_INT]])
@@ -409,6 +409,82 @@ join:
   ret void
 }
 
+; We should preserve the !nosanitize if both insns have it.
+define void @test_nosanitize1(ptr %p) {
+; CHECK-LABEL: define void @test_nosanitize1
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P]], align 4, !nosanitize [[META6]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[V1]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    call void @use.i32(i32 0)
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    ret void
+;
+  %v1 = load i32, ptr %p, !nosanitize !11
+  %cond = icmp eq i32 %v1, 0
+  br i1 %cond, label %if, label %join
+
+if:
+  %v2 = load i32, ptr %p, !nosanitize !11
+  call void @use.i32(i32 %v2)
+  br label %join
+
+join:
+  ret void
+}
+
+define void @test_nosanitize2(ptr %p) {
+; CHECK-LABEL: define void @test_nosanitize2
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[V1]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    call void @use.i32(i32 0)
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    ret void
+;
+  %v1 = load i32, ptr %p, !nosanitize !11
+  %cond = icmp eq i32 %v1, 0
+  br i1 %cond, label %if, label %join
+
+if:
+  %v2 = load i32, ptr %p
+  call void @use.i32(i32 %v2)
+  br label %join
+
+join:
+  ret void
+}
+
+define void @test_nosanitize3(ptr %p) {
+; CHECK-LABEL: define void @test_nosanitize3
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[V1]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    call void @use.i32(i32 0)
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    ret void
+;
+  %v1 = load i32, ptr %p
+  %cond = icmp eq i32 %v1, 0
+  br i1 %cond, label %if, label %join
+
+if:
+  %v2 = load i32, ptr %p, !nosanitize !11
+  call void @use.i32(i32 %v2)
+  br label %join
+
+join:
+  ret void
+}
+
 !0 = !{i32 0, i32 2}
 !1 = !{i32 3, i32 5}
 !2 = !{i32 2, i32 5}
@@ -430,8 +506,8 @@ join:
 ; CHECK: [[RNG3]] = !{i32 -5, i32 -2, i32 1, i32 5}
 ; CHECK: [[RNG4]] = !{i32 10, i32 1}
 ; CHECK: [[RNG5]] = !{i32 3, i32 4, i32 5, i32 2}
-; CHECK: [[META6:![0-9]+]] = !{}
-; CHECK: [[META7:![0-9]+]] = !{i64 10}
+; CHECK: [[META6]] = !{}
+; CHECK: [[META7]] = !{i64 10}
 ; CHECK: [[RNG8]] = !{i64 0, i64 10}
 ; CHECK: [[RNG9]] = !{i64 0, i64 10, i64 20, i64 30}
 ; CHECK: [[RNG10]] = !{i64 10, i64 30}


        


More information about the llvm-commits mailing list