[llvm] [AMDGPU] Visit all PHIs in each call to optimizeLiveType (PR #147522)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 01:34:54 PDT 2025


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/147522

>From 8142cf37d0285b879f3560fa42f64dc0eca59327 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 8 Jul 2025 15:30:04 +0200
Subject: [PATCH 1/2] [AMDGPU] Visit all PHIs in each call to optimizeLiveType

Make the Visited set a local variable, otherwise we can reject a PHI
(those that do not have a zeroinitializer constant) but mark it as visited,
and the rest of the function thinks the PHI is ok when it isn't.
This is a bit crude but it's the only fix that consistently worked in my testing.

Fixes SWDEV-541767
---
 .../AMDGPU/AMDGPULateCodeGenPrepare.cpp       |  3 +-
 .../amdgpu-late-codegenprepare-crash-splat.ll | 94 +++++++++++++++++++
 2 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index df976cf3f7fdb..523c66c72273c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -79,8 +79,6 @@ class LiveRegOptimizer {
 
   /// The scalar type to convert to
   Type *const ConvertToScalar;
-  /// The set of visited Instructions
-  SmallPtrSet<Instruction *, 4> Visited;
   /// Map of Value -> Converted Value
   ValueToValueMap ValMap;
   /// Map of containing conversions from Optimal Type -> Original Type per BB.
@@ -288,6 +286,7 @@ bool LiveRegOptimizer::optimizeLiveType(
   SmallPtrSet<PHINode *, 4> PhiNodes;
   SmallPtrSet<Instruction *, 4> Defs;
   SmallPtrSet<Instruction *, 4> Uses;
+  SmallPtrSet<Instruction *, 4> Visited;
 
   Worklist.push_back(cast<Instruction>(I));
   while (!Worklist.empty()) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll
new file mode 100644
index 0000000000000..837b5e0ab5833
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes="amdgpu-late-codegenprepare,verify" %s  | FileCheck %s
+
+; This crashed because the PHI with a splat was rejected, but then we marked the PHI
+; as visited and tried to convert one of its user afterwards.
+
+define amdgpu_kernel void @widget(i1 %arg, <4 x i8> %arg1, i64 %arg2) {
+; CHECK-LABEL: define amdgpu_kernel void @widget(
+; CHECK-SAME: i1 [[ARG:%.*]], <4 x i8> [[ARG1:%.*]], i64 [[ARG2:%.*]]) {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[WIDGET_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 36
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
+; CHECK-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 40
+; CHECK-NEXT:    [[ARG1_LOAD:%.*]] = load <4 x i8>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 8
+; CHECK-NEXT:    [[ARG2_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 44
+; CHECK-NEXT:    [[ARG2_LOAD:%.*]] = load i64, ptr addrspace(4) [[ARG2_KERNARG_OFFSET]], align 4
+; CHECK-NEXT:    br label %[[BB_3:.*]]
+; CHECK:       [[BB_3]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr addrspace(1) [ null, %[[BB]] ], [ [[GETELEMENTPTR:%.*]], %[[BB_14:.*]] ]
+; CHECK-NEXT:    [[PHI4:%.*]] = phi <4 x i8> [ splat (i8 1), %[[BB]] ], [ [[PHI15:%.*]], %[[BB_14]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_6_PREHEADER:.*]], label %[[BB_5:.*]]
+; CHECK:       [[BB_5]]:
+; CHECK-NEXT:    br label %[[BB_14]]
+; CHECK:       [[BB_6_PREHEADER]]:
+; CHECK-NEXT:    br label %[[BB_6:.*]]
+; CHECK:       [[BB_6]]:
+; CHECK-NEXT:    [[PHI7:%.*]] = phi <4 x i8> [ [[PHI13:%.*]], %[[BB_12:.*]] ], [ [[PHI4]], %[[BB_6_PREHEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_8:.*]], label %[[BB_12]]
+; CHECK:       [[BB_8]]:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_10:.*]], label %[[BB_9:.*]]
+; CHECK:       [[BB_9]]:
+; CHECK-NEXT:    br label %[[BB_10]]
+; CHECK:       [[BB_10]]:
+; CHECK-NEXT:    [[PHI11:%.*]] = phi <4 x i8> [ [[PHI7]], %[[BB_9]] ], [ zeroinitializer, %[[BB_8]] ]
+; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x i8> [[PHI11]], i64 0
+; CHECK-NEXT:    store i8 [[EXTRACTELEMENT]], ptr addrspace(1) [[PHI]], align 1
+; CHECK-NEXT:    br label %[[BB_12]]
+; CHECK:       [[BB_12]]:
+; CHECK-NEXT:    [[PHI13]] = phi <4 x i8> [ zeroinitializer, %[[BB_10]] ], [ [[PHI7]], %[[BB_6]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_6]], label %[[BB_14]]
+; CHECK:       [[BB_14]]:
+; CHECK-NEXT:    [[PHI15]] = phi <4 x i8> [ [[ARG1_LOAD]], %[[BB_5]] ], [ zeroinitializer, %[[BB_12]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR]] = getelementptr i8, ptr addrspace(1) [[PHI]], i64 [[ARG2_LOAD]]
+; CHECK-NEXT:    br label %[[BB_3]]
+;
+bb:
+  %widget.kernarg.segment = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+  %arg.kernarg.offset.align.down = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 36
+  %0 = load i32, ptr addrspace(4) %arg.kernarg.offset.align.down, align 4
+  %1 = trunc i32 %0 to i1
+  %arg1.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 40
+  %arg1.load = load <4 x i8>, ptr addrspace(4) %arg1.kernarg.offset, align 8
+  %arg2.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 44
+  %arg2.load = load i64, ptr addrspace(4) %arg2.kernarg.offset, align 4
+  br label %bb.3
+
+bb.3:                                              ; preds = %bb.14, %bb
+  %phi = phi ptr addrspace(1) [ null, %bb ], [ %getelementptr, %bb.14 ]
+  %phi4 = phi <4 x i8> [ splat (i8 1), %bb ], [ %phi15, %bb.14 ]
+  br i1 %1, label %bb.6.preheader, label %bb.5
+
+bb.5:                                              ; preds = %bb.3
+  br label %bb.14
+
+bb.6.preheader:                                    ; preds = %bb.3
+  br label %bb.6
+
+bb.6:                                              ; preds = %bb.6.preheader, %bb.12
+  %phi7 = phi <4 x i8> [ %phi13, %bb.12 ], [ %phi4, %bb.6.preheader ]
+  br i1 %1, label %bb.8, label %bb.12
+
+bb.8:                                              ; preds = %bb.6
+  br i1 %1, label %bb.10, label %bb.9
+
+bb.9:                                              ; preds = %bb.8
+  br label %bb.10
+
+bb.10:                                             ; preds = %bb.9, %bb.8
+  %phi11 = phi <4 x i8> [ %phi7, %bb.9 ], [ zeroinitializer, %bb.8 ]
+  %extractelement = extractelement <4 x i8> %phi11, i64 0
+  store i8 %extractelement, ptr addrspace(1) %phi, align 1
+  br label %bb.12
+
+bb.12:                                             ; preds = %bb.10, %bb.6
+  %phi13 = phi <4 x i8> [ zeroinitializer, %bb.10 ], [ %phi7, %bb.6 ]
+  br i1 %1, label %bb.6, label %bb.14
+
+bb.14:                                             ; preds = %bb.5, %bb.12
+  %phi15 = phi <4 x i8> [ %arg1.load, %bb.5 ], [ zeroinitializer, %bb.12 ]
+  %getelementptr = getelementptr i8, ptr addrspace(1) %phi, i64 %arg2.load
+  br label %bb.3
+}

>From 142707bb2e28ccdc7df9257f8787995626841700 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 9 Jul 2025 10:34:42 +0200
Subject: [PATCH 2/2] Further reduce test

---
 .../amdgpu-late-codegenprepare-crash-splat.ll | 126 ++++++++----------
 1 file changed, 52 insertions(+), 74 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll
index 837b5e0ab5833..b8464c37a5dc2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll
@@ -1,94 +1,72 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes="amdgpu-late-codegenprepare,verify" %s  | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-late-codegenprepare %s  | FileCheck %s
 
 ; This crashed because the PHI with a splat was rejected, but then we marked the PHI
 ; as visited and tried to convert one of its user afterwards.
 
-define amdgpu_kernel void @widget(i1 %arg, <4 x i8> %arg1, i64 %arg2) {
+define amdgpu_kernel void @widget(ptr %arg, ptr %arg1, ptr %arg2) {
 ; CHECK-LABEL: define amdgpu_kernel void @widget(
-; CHECK-SAME: i1 [[ARG:%.*]], <4 x i8> [[ARG1:%.*]], i64 [[ARG2:%.*]]) {
+; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) {
 ; CHECK-NEXT:  [[BB:.*]]:
-; CHECK-NEXT:    [[WIDGET_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
-; CHECK-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 36
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
-; CHECK-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 40
-; CHECK-NEXT:    [[ARG1_LOAD:%.*]] = load <4 x i8>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 8
-; CHECK-NEXT:    [[ARG2_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 44
-; CHECK-NEXT:    [[ARG2_LOAD:%.*]] = load i64, ptr addrspace(4) [[ARG2_KERNARG_OFFSET]], align 4
-; CHECK-NEXT:    br label %[[BB_3:.*]]
-; CHECK:       [[BB_3]]:
-; CHECK-NEXT:    [[PHI:%.*]] = phi ptr addrspace(1) [ null, %[[BB]] ], [ [[GETELEMENTPTR:%.*]], %[[BB_14:.*]] ]
-; CHECK-NEXT:    [[PHI4:%.*]] = phi <4 x i8> [ splat (i8 1), %[[BB]] ], [ [[PHI15:%.*]], %[[BB_14]] ]
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_6_PREHEADER:.*]], label %[[BB_5:.*]]
+; CHECK-NEXT:    [[ARG1_LOAD:%.*]] = load <4 x i8>, ptr [[ARG1]], align 4
+; CHECK-NEXT:    [[ARG2_LOAD:%.*]] = load i64, ptr [[ARG2]], align 4
+; CHECK-NEXT:    br label %[[BB_1:.*]]
+; CHECK:       [[BB_1]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ null, %[[BB]] ], [ [[ARG1]], %[[BB_6:.*]] ]
+; CHECK-NEXT:    [[PHI4:%.*]] = phi <4 x i8> [ splat (i8 1), %[[BB]] ], [ [[PHI15:%.*]], %[[BB_6]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_2:.*]], label %[[BB_6]]
+; CHECK:       [[BB_2]]:
+; CHECK-NEXT:    [[PHI7:%.*]] = phi <4 x i8> [ [[PHI13:%.*]], %[[BB_5:.*]] ], [ [[PHI4]], %[[BB_1]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_4:.*]], label %[[BB_5]]
+; CHECK:       [[BB_3:.*]]:
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_4]], label %[[BB_EXIT:.*]]
+; CHECK:       [[BB_4]]:
+; CHECK-NEXT:    [[PHI11:%.*]] = phi <4 x i8> [ [[PHI7]], %[[BB_3]] ], [ zeroinitializer, %[[BB_2]] ]
+; CHECK-NEXT:    store <4 x i8> [[PHI11]], ptr [[PHI]], align 1
+; CHECK-NEXT:    br label %[[BB_5]]
 ; CHECK:       [[BB_5]]:
-; CHECK-NEXT:    br label %[[BB_14]]
-; CHECK:       [[BB_6_PREHEADER]]:
-; CHECK-NEXT:    br label %[[BB_6:.*]]
+; CHECK-NEXT:    [[PHI13]] = phi <4 x i8> [ zeroinitializer, %[[BB_4]] ], [ [[PHI7]], %[[BB_2]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_2]], label %[[BB_6]]
 ; CHECK:       [[BB_6]]:
-; CHECK-NEXT:    [[PHI7:%.*]] = phi <4 x i8> [ [[PHI13:%.*]], %[[BB_12:.*]] ], [ [[PHI4]], %[[BB_6_PREHEADER]] ]
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_8:.*]], label %[[BB_12]]
-; CHECK:       [[BB_8]]:
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_10:.*]], label %[[BB_9:.*]]
-; CHECK:       [[BB_9]]:
-; CHECK-NEXT:    br label %[[BB_10]]
-; CHECK:       [[BB_10]]:
-; CHECK-NEXT:    [[PHI11:%.*]] = phi <4 x i8> [ [[PHI7]], %[[BB_9]] ], [ zeroinitializer, %[[BB_8]] ]
-; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x i8> [[PHI11]], i64 0
-; CHECK-NEXT:    store i8 [[EXTRACTELEMENT]], ptr addrspace(1) [[PHI]], align 1
-; CHECK-NEXT:    br label %[[BB_12]]
-; CHECK:       [[BB_12]]:
-; CHECK-NEXT:    [[PHI13]] = phi <4 x i8> [ zeroinitializer, %[[BB_10]] ], [ [[PHI7]], %[[BB_6]] ]
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB_6]], label %[[BB_14]]
-; CHECK:       [[BB_14]]:
-; CHECK-NEXT:    [[PHI15]] = phi <4 x i8> [ [[ARG1_LOAD]], %[[BB_5]] ], [ zeroinitializer, %[[BB_12]] ]
-; CHECK-NEXT:    [[GETELEMENTPTR]] = getelementptr i8, ptr addrspace(1) [[PHI]], i64 [[ARG2_LOAD]]
-; CHECK-NEXT:    br label %[[BB_3]]
+; CHECK-NEXT:    [[PHI15]] = phi <4 x i8> [ [[ARG1_LOAD]], %[[BB_1]] ], [ zeroinitializer, %[[BB_5]] ]
+; CHECK-NEXT:    br label %[[BB_1]]
+; CHECK:       [[BB_EXIT]]:
+; CHECK-NEXT:    ret void
 ;
 bb:
-  %widget.kernarg.segment = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
-  %arg.kernarg.offset.align.down = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 36
-  %0 = load i32, ptr addrspace(4) %arg.kernarg.offset.align.down, align 4
-  %1 = trunc i32 %0 to i1
-  %arg1.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 40
-  %arg1.load = load <4 x i8>, ptr addrspace(4) %arg1.kernarg.offset, align 8
-  %arg2.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 44
-  %arg2.load = load i64, ptr addrspace(4) %arg2.kernarg.offset, align 4
-  br label %bb.3
+  %ld = load i32, ptr %arg, align 4
+  %ld.trunc = trunc i32 %ld to i1
+  %arg1.load = load <4 x i8>, ptr %arg1, align 4
+  %arg2.load = load i64, ptr %arg2, align 4
+  br label %bb.1
 
-bb.3:                                              ; preds = %bb.14, %bb
-  %phi = phi ptr addrspace(1) [ null, %bb ], [ %getelementptr, %bb.14 ]
-  %phi4 = phi <4 x i8> [ splat (i8 1), %bb ], [ %phi15, %bb.14 ]
-  br i1 %1, label %bb.6.preheader, label %bb.5
+bb.1:
+  %phi = phi ptr [ null, %bb ], [ %arg1, %bb.6 ]
+  %phi4 = phi <4 x i8> [ splat (i8 1), %bb ], [ %phi15, %bb.6 ]
+  br i1 %ld.trunc, label %bb.2, label %bb.6
 
-bb.5:                                              ; preds = %bb.3
-  br label %bb.14
+bb.2:
+  %phi7 = phi <4 x i8> [ %phi13, %bb.5 ], [ %phi4, %bb.1 ]
+  br i1 %ld.trunc, label %bb.4, label %bb.5
 
-bb.6.preheader:                                    ; preds = %bb.3
-  br label %bb.6
+bb.3:
+  br i1 %ld.trunc, label %bb.4, label %bb.exit
 
-bb.6:                                              ; preds = %bb.6.preheader, %bb.12
-  %phi7 = phi <4 x i8> [ %phi13, %bb.12 ], [ %phi4, %bb.6.preheader ]
-  br i1 %1, label %bb.8, label %bb.12
+bb.4:
+  %phi11 = phi <4 x i8> [ %phi7, %bb.3 ], [ zeroinitializer, %bb.2 ]
+  store <4 x i8> %phi11, ptr %phi, align 1
+  br label %bb.5
 
-bb.8:                                              ; preds = %bb.6
-  br i1 %1, label %bb.10, label %bb.9
+bb.5:
+  %phi13 = phi <4 x i8> [ zeroinitializer, %bb.4 ], [ %phi7, %bb.2 ]
+  br i1 %ld.trunc, label %bb.2, label %bb.6
 
-bb.9:                                              ; preds = %bb.8
-  br label %bb.10
+bb.6:
+  %phi15 = phi <4 x i8> [ %arg1.load, %bb.1 ], [ zeroinitializer, %bb.5 ]
+  br label %bb.1
 
-bb.10:                                             ; preds = %bb.9, %bb.8
-  %phi11 = phi <4 x i8> [ %phi7, %bb.9 ], [ zeroinitializer, %bb.8 ]
-  %extractelement = extractelement <4 x i8> %phi11, i64 0
-  store i8 %extractelement, ptr addrspace(1) %phi, align 1
-  br label %bb.12
-
-bb.12:                                             ; preds = %bb.10, %bb.6
-  %phi13 = phi <4 x i8> [ zeroinitializer, %bb.10 ], [ %phi7, %bb.6 ]
-  br i1 %1, label %bb.6, label %bb.14
-
-bb.14:                                             ; preds = %bb.5, %bb.12
-  %phi15 = phi <4 x i8> [ %arg1.load, %bb.5 ], [ zeroinitializer, %bb.12 ]
-  %getelementptr = getelementptr i8, ptr addrspace(1) %phi, i64 %arg2.load
-  br label %bb.3
+bb.exit:
+  ret void
 }



More information about the llvm-commits mailing list