[llvm] InstCombine: Fix a crash in `PointerReplacer` when constructing a new PHI (PR #130256)
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 11:47:58 PST 2025
https://github.com/changpeng updated https://github.com/llvm/llvm-project/pull/130256
>From 28319491cb44c37699dc97d1696c80c2836229ea Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Thu, 6 Mar 2025 23:08:42 -0800
Subject: [PATCH 1/4] InstCombine: Fix a crash in `PointerReplacer` when
constructing a PHI
When constructing a PHI node in `PointerReplacer::replace`, the imcoming operands
are expected to have already been replaced and in the replacement map. However, when
one of the incoming operands is a load, the search of the map is unsuccessful, and a
nullptr is returned from `getReplacement`. The reason is that, when a load is replaced,
all the uses of the load has been actually replaced by the new load. It is useless to
insert the original load into the map. Instead, we should place the new load into the
map to meet the expectation of the later map search.
Fixes: SWDEV-516420.
---
.../InstCombineLoadStoreAlloca.cpp | 5 +-
.../AMDGPU/phi-with-incoming-from-load.ll | 48 +++++++++++++++++++
2 files changed, 52 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c1f5e286ab3ed..c29cba6f675c5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -360,7 +360,10 @@ void PointerReplacer::replace(Instruction *I) {
IC.InsertNewInstWith(NewI, LT->getIterator());
IC.replaceInstUsesWith(*LT, NewI);
- WorkMap[LT] = NewI;
+ // LT has actually been replaced by NewI. It is useless to insert LT into
+ // the map. Instead, we insert NewI into the map to indicate this is the
+ // replacement (new value).
+ WorkMap[NewI] = NewI;
} else if (auto *PHI = dyn_cast<PHINode>(I)) {
Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
new file mode 100644
index 0000000000000..eb15db6f3e079
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -O1 -S -o - %s | FileCheck %s
+; REQUIRES: amdgpu-registered-target
+
+target triple = "amdgcn-amd-amdhsa"
+
+%"doube_double" = type { double, double}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+; Function Attrs: convergent mustprogress norecurse nounwind sanitize_address uwtable
+define amdgpu_kernel void @_test(ptr addrspace(4) noundef byref(%"doube_double") align 8 %0) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @_test(
+; CHECK-SAME: ptr addrspace(4) noundef readonly byref([[DOUBE_DOUBLE:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ALPHA_UNION:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(5) null, align 2147483648
+; CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+; CHECK-NEXT: br i1 [[LOADEDV]], label %[[COND_END:.*]], label %[[COND_FALSE:.*]]
+; CHECK: [[COND_FALSE]]:
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8
+; CHECK-NEXT: br label %[[COND_END]]
+; CHECK: [[COND_END]]:
+; CHECK-NEXT: [[COND1:%.*]] = phi ptr [ [[TMP2]], %[[COND_FALSE]] ], [ [[ALPHA_UNION]], %[[ENTRY]] ]
+; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) poison, ptr noundef nonnull align 8 dereferenceable(16) [[COND1]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %coerce = alloca %"doube_double", align 8, addrspace(5)
+ %alpha_union = addrspacecast ptr addrspace(5) %coerce to ptr
+ %is_host_mode.addr.ascast = addrspacecast ptr addrspace(5) null to ptr
+ call void @llvm.memcpy.p0.p4.i64(ptr align 8 %alpha_union, ptr addrspace(4) align 8 %0, i64 16, i1 false)
+ %1 = load i8, ptr %is_host_mode.addr.ascast, align 1
+ %loadedv = trunc i8 %1 to i1
+ br i1 %loadedv, label %cond.end, label %cond.false
+
+cond.false: ; preds = %entry
+ %2 = load ptr, ptr %alpha_union, align 8
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %entry
+ %cond = phi ptr [ %2, %cond.false ], [ %alpha_union, %entry ]
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 %cond, i64 16, i1 false)
+ ret void
+}
+
+attributes #0 = { convergent mustprogress norecurse nounwind sanitize_address uwtable "amdgpu-flat-work-group-size"="1,128" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xnack" "uniform-work-group-size"="true" }
>From 7ef6ac071a103c123e007e2aff78761d31dcdc58 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Fri, 7 Mar 2025 00:10:05 -0800
Subject: [PATCH 2/4] InstCombine: Fix a crash in PointerReplacer when
constructing a new PHI
Update the LIT tests.
---
.../AMDGPU/phi-with-incoming-from-load.ll | 45 +++++++++----------
1 file changed, 20 insertions(+), 25 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
index eb15db6f3e079..b6853426e4ede 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
@@ -1,48 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -O1 -S -o - %s | FileCheck %s
+; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s
; REQUIRES: amdgpu-registered-target
target triple = "amdgcn-amd-amdhsa"
-%"doube_double" = type { double, double}
+%double_double = type { double, double }
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
-; Function Attrs: convergent mustprogress norecurse nounwind sanitize_address uwtable
-define amdgpu_kernel void @_test(ptr addrspace(4) noundef byref(%"doube_double") align 8 %0) #0 {
-; CHECK-LABEL: define amdgpu_kernel void @_test(
-; CHECK-SAME: ptr addrspace(4) noundef readonly byref([[DOUBE_DOUBLE:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+define void @_test(ptr addrspace(4) byref(%double_double) align 8 %in) {
+; CHECK-LABEL: define void @_test(
+; CHECK-SAME: ptr addrspace(4) byref([[DOUBLE_DOUBLE:%.*]]) align 8 [[IN:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[ALPHA_UNION:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(5) null, align 2147483648
-; CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+; CHECK-NEXT: [[ALPHA_UNION:%.*]] = addrspacecast ptr addrspace(4) [[IN]] to ptr
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(5) null, align 1
+; CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[LOAD]] to i1
; CHECK-NEXT: br i1 [[LOADEDV]], label %[[COND_END:.*]], label %[[COND_FALSE:.*]]
; CHECK: [[COND_FALSE]]:
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(4) [[IN]], align 8
; CHECK-NEXT: br label %[[COND_END]]
; CHECK: [[COND_END]]:
-; CHECK-NEXT: [[COND1:%.*]] = phi ptr [ [[TMP2]], %[[COND_FALSE]] ], [ [[ALPHA_UNION]], %[[ENTRY]] ]
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) poison, ptr noundef nonnull align 8 dereferenceable(16) [[COND1]], i64 16, i1 false)
+; CHECK-NEXT: [[COND1:%.*]] = phi ptr [ [[TMP0]], %[[COND_FALSE]] ], [ [[ALPHA_UNION]], %[[ENTRY]] ]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) poison, ptr noundef nonnull align 1 dereferenceable(16) [[COND1]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
entry:
- %coerce = alloca %"doube_double", align 8, addrspace(5)
+ %coerce = alloca %double_double, align 8, addrspace(5)
%alpha_union = addrspacecast ptr addrspace(5) %coerce to ptr
- %is_host_mode.addr.ascast = addrspacecast ptr addrspace(5) null to ptr
- call void @llvm.memcpy.p0.p4.i64(ptr align 8 %alpha_union, ptr addrspace(4) align 8 %0, i64 16, i1 false)
- %1 = load i8, ptr %is_host_mode.addr.ascast, align 1
- %loadedv = trunc i8 %1 to i1
+ call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %coerce, ptr addrspace(4) align 8 %in, i64 16, i1 false)
+ %load = load i8, ptr addrspace(5) null, align 1
+ %loadedv = trunc i8 %load to i1
br i1 %loadedv, label %cond.end, label %cond.false
-cond.false: ; preds = %entry
- %2 = load ptr, ptr %alpha_union, align 8
+cond.false:
+ %2 = load ptr, ptr addrspace(5) %coerce, align 8
br label %cond.end
-cond.end: ; preds = %cond.false, %entry
+cond.end:
%cond = phi ptr [ %2, %cond.false ], [ %alpha_union, %entry ]
- call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 %cond, i64 16, i1 false)
+ call void @llvm.memcpy.p0.p0.i64(ptr poison, ptr %cond, i64 16, i1 false)
ret void
}
-
-attributes #0 = { convergent mustprogress norecurse nounwind sanitize_address uwtable "amdgpu-flat-work-group-size"="1,128" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xnack" "uniform-work-group-size"="true" }
>From 5d67890285cb0c6f4f708f0fb968a5b79891dbc6 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Fri, 7 Mar 2025 11:24:03 -0800
Subject: [PATCH 3/4] InstCombine: Fix a crash in PointerReplacer when
constructing a new PHI
Update LIT test based on the comment.
---
.../Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
index b6853426e4ede..7c81ee406a9de 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s
-; REQUIRES: amdgpu-registered-target
target triple = "amdgcn-amd-amdhsa"
>From 1c5e5ba526e76df9163c9b309301d0e6cf8e0db7 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Fri, 7 Mar 2025 11:46:19 -0800
Subject: [PATCH 4/4] InstCombine: Fix a crash in PointerReplacer when
constructing a new PHI
Use all named variables in the LIT test
---
.../InstCombine/AMDGPU/phi-with-incoming-from-load.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
index 7c81ee406a9de..14fb45e43af86 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/phi-with-incoming-from-load.ll
@@ -27,16 +27,16 @@ entry:
%coerce = alloca %double_double, align 8, addrspace(5)
%alpha_union = addrspacecast ptr addrspace(5) %coerce to ptr
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %coerce, ptr addrspace(4) align 8 %in, i64 16, i1 false)
- %load = load i8, ptr addrspace(5) null, align 1
- %loadedv = trunc i8 %load to i1
+ %load1 = load i8, ptr addrspace(5) null, align 1
+ %loadedv = trunc i8 %load1 to i1
br i1 %loadedv, label %cond.end, label %cond.false
cond.false:
- %2 = load ptr, ptr addrspace(5) %coerce, align 8
+ %load2 = load ptr, ptr addrspace(5) %coerce, align 8
br label %cond.end
cond.end:
- %cond = phi ptr [ %2, %cond.false ], [ %alpha_union, %entry ]
+ %cond = phi ptr [ %load2, %cond.false ], [ %alpha_union, %entry ]
call void @llvm.memcpy.p0.p0.i64(ptr poison, ptr %cond, i64 16, i1 false)
ret void
}
More information about the llvm-commits
mailing list