[llvm] [Utils][Local] Preserve !nosanitize metadata in combineMetadata when merging instructions (PR #148376)
Kunqiu Chen via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 12 07:53:32 PDT 2025
https://github.com/Camsyn created https://github.com/llvm/llvm-project/pull/148376
`combineMetadata` helper currently drops `!nosanitize` metadata when merging two instructions, even if both originally carried `!nosanitize`.
This is problematic because `!nosanitize` is a key mechanism used by sanitizer (e.g., ASan) to suppress instrumentation. Removing it can lead to unintended sanitizer behavior.
This patch adds `nosanitize` to the whitelist in combineMetadata, preserving it only if both instructions carry `!nosanitize`; otherwise, it is dropped. This patch also adds a corresponding test in a test file and regenerates it.
---
### Details
**Example (see [Godbolt](https://godbolt.org/z/83P5eWczx) for details**):
```llvm
%v1 = load i32, ptr %p, !nosanitize
%v2 = load i32, ptr %p, !nosanitize
```
When merged via `combineMetadata(%v1, %v2, ...)`, the resulting instruction loses its `!nosanitize` metadata.
Tools such as UBSan and AFL rely on `nosanitize` to prevent unwanted transformations or checks. However, the current implementation of combineMetadata mistakenly drops !nosanitize. This may lead to unintended behavior during optimization.
For example, under `-fsanitize=address,undefined -O2`, IR emitted by UBSan may lose its `!nosanitize` metadata due to the incorrect metadata merging in optimization. As a result, ASan could unexpectedly instrument those instructions.
> Note: due to the current UBSan handlers having relatively coarse-grained attributes, this specific case is difficult to reproduce end-to-end from source code—UBSan currently inhibits such optimizations (refer to #135135 for details).
Still, I believe it's necessary to fix this now, to support future versions of UBSan that might allow such optimizations, and to support third-party tools (such as AFL-based fuzzers) that rely on the presence of !nosanitize.
>From a5c98957a8ecc0b59136d36afe447cbc024ee7e7 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Sat, 12 Jul 2025 21:25:46 +0800
Subject: [PATCH] fix mssing handling for nosanitize in combineMetadata | add
test and regenerate the test
---
llvm/lib/Transforms/Utils/Local.cpp | 4 +++
llvm/test/Transforms/GVN/metadata.ll | 41 ++++++++++++++++++++++------
2 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index f5208d50c6aae..6f4760ebaf81a 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3381,6 +3381,10 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericNoaliasAddrspace(JMD, KMD));
break;
+ case LLVMContext::MD_nosanitize:
+ // Preserve !nosanitize if both K and J have it.
+ K->setMetadata(Kind, JMD);
+ break;
}
}
// Set !invariant.group from J if J has it. If both instructions have it
diff --git a/llvm/test/Transforms/GVN/metadata.ll b/llvm/test/Transforms/GVN/metadata.ll
index a5dbb5ee06070..98e3a8fee7cc3 100644
--- a/llvm/test/Transforms/GVN/metadata.ll
+++ b/llvm/test/Transforms/GVN/metadata.ll
@@ -112,7 +112,7 @@ define i32 @test8(ptr %p) {
define i32 @load_noundef_load(ptr %p) {
; CHECK-LABEL: define i32 @load_noundef_load
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef !6
+; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef [[META6:![0-9]+]]
; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]]
; CHECK-NEXT: ret i32 [[C]]
;
@@ -138,7 +138,7 @@ define i32 @load_load_noundef(ptr %p) {
define void @load_dereferenceable_dominating(ptr %p) {
; CHECK-LABEL: define void @load_dereferenceable_dominating
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7
+; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable [[META7:![0-9]+]]
; CHECK-NEXT: call void @use.ptr(ptr [[A]])
; CHECK-NEXT: call void @use.ptr(ptr [[A]])
; CHECK-NEXT: ret void
@@ -185,7 +185,7 @@ define void @load_ptr_nonnull_to_i64(ptr %p) {
define void @load_ptr_nonnull_noundef_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_nonnull_noundef_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !nonnull !6, !noundef !6
+; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META6]], !noundef [[META6]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@@ -202,7 +202,7 @@ define void @load_ptr_nonnull_noundef_to_i64(ptr %p) {
define void @load_ptr_invariant_load_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_invariant_load_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !invariant.load !6
+; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !invariant.load [[META6]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@@ -219,7 +219,7 @@ define void @load_ptr_invariant_load_to_i64(ptr %p) {
define void @load_ptr_dereferenceable_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_dereferenceable_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7
+; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable [[META7]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@@ -236,7 +236,7 @@ define void @load_ptr_dereferenceable_to_i64(ptr %p) {
define void @load_ptr_dereferenceable_or_null_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_dereferenceable_or_null_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null !7
+; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null [[META7]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@@ -409,6 +409,31 @@ join:
ret void
}
+define void @test_nosanitize(ptr %p) {
+; CHECK-LABEL: define void @test_nosanitize
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !nosanitize [[META6]]
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[V1]], 0
+; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK: if:
+; CHECK-NEXT: call void @use.i32(i32 0)
+; CHECK-NEXT: br label [[JOIN]]
+; CHECK: join:
+; CHECK-NEXT: ret void
+;
+ %v1 = load i32, ptr %p, !nosanitize !11
+ %cond = icmp eq i32 %v1, 0
+ br i1 %cond, label %if, label %join
+
+if:
+ %v2 = load i32, ptr %p, !nosanitize !11
+ call void @use.i32(i32 %v2)
+ br label %join
+
+join:
+ ret void
+}
+
!0 = !{i32 0, i32 2}
!1 = !{i32 3, i32 5}
!2 = !{i32 2, i32 5}
@@ -430,8 +455,8 @@ join:
; CHECK: [[RNG3]] = !{i32 -5, i32 -2, i32 1, i32 5}
; CHECK: [[RNG4]] = !{i32 10, i32 1}
; CHECK: [[RNG5]] = !{i32 3, i32 4, i32 5, i32 2}
-; CHECK: [[META6:![0-9]+]] = !{}
-; CHECK: [[META7:![0-9]+]] = !{i64 10}
+; CHECK: [[META6]] = !{}
+; CHECK: [[META7]] = !{i64 10}
; CHECK: [[RNG8]] = !{i64 0, i64 10}
; CHECK: [[RNG9]] = !{i64 0, i64 10, i64 20, i64 30}
; CHECK: [[RNG10]] = !{i64 10, i64 30}
More information about the llvm-commits
mailing list