[llvm] [InstSimplify] Fix incorrect poison propagation when folding phi (PR #96631)

Thu Nov 7 03:42:19 PST 2024

https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/96631

>From 6b8688c6a8c82a8ea7505c71af0b2df52cbda5ba Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 25 Jun 2024 14:38:03 +0200
Subject: [PATCH] [InstSimplify] Fix incorrect poison propagation when folding
 phi

We can only replace phi(X, undef) with X, if X is known not to be
poison. Otherwise, the result may be more poisonous on the undef
branch.
---
 llvm/lib/Analysis/InstructionSimplify.cpp     |  9 +++-
 llvm/test/CodeGen/AArch64/convertphitype.ll   |  5 ++-
 .../sve-breakdown-scalable-vectortype.ll      | 42 +++++++++----------
 .../CodeGen/Hexagon/frame-offset-overflow.ll  |  2 +-
 llvm/test/CodeGen/Hexagon/trunc-mpy.ll        |  2 +-
 llvm/test/CodeGen/PowerPC/sms-phi-5.ll        |  2 +-
 llvm/test/Transforms/InstCombine/phi.ll       |  3 +-
 llvm/test/Transforms/InstSimplify/phi.ll      |  3 +-
 llvm/test/Transforms/LoopDeletion/pr53969.ll  |  2 +-
 llvm/test/Transforms/LoopVectorize/pr31190.ll |  4 +-
 .../Transforms/Mem2Reg/UndefValuesMerge.ll    |  3 +-
 llvm/test/Transforms/Mem2Reg/single-store.ll  |  3 +-
 12 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 2cb2612bf611e3..daa468ac095c36 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5315,7 +5315,14 @@ static Value *simplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
     // If we have a PHI node like phi(X, undef, X), where X is defined by some
     // instruction, we cannot return X as the result of the PHI node unless it
     // dominates the PHI block.
-    return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
+    if (!valueDominatesPHI(CommonValue, PN, Q.DT))
+      return nullptr;
+
+    // Make sure we do not replace an undef value with poison.
+    if (HasUndefInput &&
+        !isGuaranteedNotToBePoison(CommonValue, Q.AC, Q.CxtI, Q.DT))
+      return nullptr;
+    return CommonValue;
   }
 
   return CommonValue;
diff --git a/llvm/test/CodeGen/AArch64/convertphitype.ll b/llvm/test/CodeGen/AArch64/convertphitype.ll
index b723b470266a78..e690653237a2dc 100644
--- a/llvm/test/CodeGen/AArch64/convertphitype.ll
+++ b/llvm/test/CodeGen/AArch64/convertphitype.ll
@@ -341,6 +341,7 @@ define float @convphi_loopdelayed(ptr %s, ptr %d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, ptr [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -349,8 +350,8 @@ define float @convphi_loopdelayed(ptr %s, ptr %d, i64 %n) {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[LS]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LS_BC]], [[LOOP]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i64 %n, 0
diff --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
index 5b69b68552a4d7..3645af3ccaee0f 100644
--- a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
+++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
@@ -33,7 +33,7 @@ define <vscale x 32 x i8> @wide_32i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 32 x i8> undef
+  ret <vscale x 32 x i8> poison
 L2:
   ret <vscale x 32 x i8> %illegal
 }
@@ -61,7 +61,7 @@ define <vscale x 16 x i16> @wide_16i16(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x i16> undef
+  ret <vscale x 16 x i16> poison
 L2:
   ret <vscale x 16 x i16> %illegal
 }
@@ -89,7 +89,7 @@ define <vscale x 8 x i32> @wide_8i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x i32> undef
+  ret <vscale x 8 x i32> poison
 L2:
   ret <vscale x 8 x i32> %illegal
 }
@@ -117,7 +117,7 @@ define <vscale x 4 x i64> @wide_4i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 4 x i64> undef
+  ret <vscale x 4 x i64> poison
 L2:
   ret <vscale x 4 x i64> %illegal
 }
@@ -145,7 +145,7 @@ define <vscale x 16 x half> @wide_16f16(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x half> undef
+  ret <vscale x 16 x half> poison
 L2:
   ret <vscale x 16 x half> %illegal
 }
@@ -173,7 +173,7 @@ define <vscale x 8 x float> @wide_8f32(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x float> undef
+  ret <vscale x 8 x float> poison
 L2:
   ret <vscale x 8 x float> %illegal
 }
@@ -201,7 +201,7 @@ define <vscale x 4 x double> @wide_4f64(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 4 x double> undef
+  ret <vscale x 4 x double> poison
 L2:
   ret <vscale x 4 x double> %illegal
 }
@@ -237,7 +237,7 @@ define <vscale x 48 x i8> @wide_48i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 48 x i8> undef
+  ret <vscale x 48 x i8> poison
 L2:
   ret <vscale x 48 x i8> %illegal
 }
@@ -269,7 +269,7 @@ define <vscale x 24 x i16> @wide_24i16(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 24 x i16> undef
+  ret <vscale x 24 x i16> poison
 L2:
   ret <vscale x 24 x i16> %illegal
 }
@@ -301,7 +301,7 @@ define <vscale x 12 x i32> @wide_12i32(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 12 x i32> undef
+  ret <vscale x 12 x i32> poison
 L2:
   ret <vscale x 12 x i32> %illegal
 }
@@ -333,7 +333,7 @@ define <vscale x 6 x i64> @wide_6i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 6 x i64> undef
+  ret <vscale x 6 x i64> poison
 L2:
   ret <vscale x 6 x i64> %illegal
 }
@@ -365,7 +365,7 @@ define <vscale x 24 x half> @wide_24f16(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 24 x half> undef
+  ret <vscale x 24 x half> poison
 L2:
   ret <vscale x 24 x half> %illegal
 }
@@ -397,7 +397,7 @@ define <vscale x 12 x float> @wide_12f32(i1 %b, <vscale x 16 x i8> %legal, <vsca
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 12 x float> undef
+  ret <vscale x 12 x float> poison
 L2:
   ret <vscale x 12 x float> %illegal
 }
@@ -429,7 +429,7 @@ define <vscale x 6 x double> @wide_6f64(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 6 x double> undef
+  ret <vscale x 6 x double> poison
 L2:
   ret <vscale x 6 x double> %illegal
 }
@@ -469,7 +469,7 @@ define <vscale x 64 x i8> @wide_64i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 64 x i8> undef
+  ret <vscale x 64 x i8> poison
 L2:
   ret <vscale x 64 x i8> %illegal
 }
@@ -505,7 +505,7 @@ define <vscale x 32 x i16> @wide_32i16(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 32 x i16> undef
+  ret <vscale x 32 x i16> poison
 L2:
   ret <vscale x 32 x i16> %illegal
 }
@@ -541,7 +541,7 @@ define <vscale x 16 x i32> @wide_16i32(i1 %b, <vscale x 16 x i8> %legal, <vscale
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x i32> undef
+  ret <vscale x 16 x i32> poison
 L2:
   ret <vscale x 16 x i32> %illegal
 }
@@ -577,7 +577,7 @@ define <vscale x 8 x i64> @wide_8i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x i64> undef
+  ret <vscale x 8 x i64> poison
 L2:
   ret <vscale x 8 x i64> %illegal
 }
@@ -613,7 +613,7 @@ define <vscale x 32 x half> @wide_32f16(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 32 x half> undef
+  ret <vscale x 32 x half> poison
 L2:
   ret <vscale x 32 x half> %illegal
 }
@@ -649,7 +649,7 @@ define <vscale x 16 x float> @wide_16f32(i1 %b, <vscale x 16 x i8> %legal, <vsca
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 16 x float> undef
+  ret <vscale x 16 x float> poison
 L2:
   ret <vscale x 16 x float> %illegal
 }
@@ -685,7 +685,7 @@ define <vscale x 8 x double> @wide_8f64(i1 %b, <vscale x 16 x i8> %legal, <vscal
   br i1 %b, label %L1, label %L2
 L1:
   call aarch64_sve_vector_pcs void @bar()
-  ret <vscale x 8 x double> undef
+  ret <vscale x 8 x double> poison
 L2:
   ret <vscale x 8 x double> %illegal
 }
diff --git a/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll b/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll
index 861a54f893f0f2..8b103054975030 100644
--- a/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll
+++ b/llvm/test/CodeGen/Hexagon/frame-offset-overflow.ll
@@ -34,7 +34,7 @@ for.body:                                         ; preds = %for.body, %entry
   %optr.0102 = phi ptr [ %incdec.ptr24.3, %for.body ], [ %p3, %entry ]
   %iptr4.0101 = phi ptr [ %incdec.ptr23.3, %for.body ], [ %incdec.ptr18, %entry ]
   %iptr3.0100 = phi ptr [ %incdec.ptr22.3, %for.body ], [ %incdec.ptr17, %entry ]
-  %iptr2.099 = phi ptr [ undef, %for.body ], [ %incdec.ptr16, %entry ]
+  %iptr2.099 = phi ptr [ poison, %for.body ], [ %incdec.ptr16, %entry ]
   %iptr1.098 = phi ptr [ %incdec.ptr20.3, %for.body ], [ %incdec.ptr15, %entry ]
   %iptr0.097 = phi ptr [ %incdec.ptr19.3, %for.body ], [ %incdec.ptr, %entry ]
   %dVsumv1.096 = phi <32 x i32> [ %60, %for.body ], [ undef, %entry ]
diff --git a/llvm/test/CodeGen/Hexagon/trunc-mpy.ll b/llvm/test/CodeGen/Hexagon/trunc-mpy.ll
index ee2a85008019eb..ea132495cde859 100644
--- a/llvm/test/CodeGen/Hexagon/trunc-mpy.ll
+++ b/llvm/test/CodeGen/Hexagon/trunc-mpy.ll
@@ -69,7 +69,7 @@ b1:                                               ; preds = %b0
 
 b2:                                               ; preds = %b2, %b1
   %v2 = phi ptr [ %v0, %b1 ], [ %v14, %b2 ]
-  %v3 = phi ptr [ %v1, %b1 ], [ undef, %b2 ]
+  %v3 = phi ptr [ %v1, %b1 ], [ poison, %b2 ]
   %v4 = phi ptr [ null, %b1 ], [ %v6, %b2 ]
   %v5 = load i32, ptr %v4, align 4
   %v6 = getelementptr inbounds i32, ptr %v4, i32 2
diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-5.ll b/llvm/test/CodeGen/PowerPC/sms-phi-5.ll
index 5a4aa8fdfbf395..5dcfdf5cc81248 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-5.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-5.ll
@@ -35,7 +35,7 @@ define void @phi5() unnamed_addr {
   %4 = phi i16 [ %18, %3 ], [ undef, %1 ]
   %5 = phi i16 [ %13, %3 ], [ undef, %1 ]
   %6 = phi i16 [ %11, %3 ], [ undef, %1 ]
-  %7 = phi i16 [ undef, %3 ], [ %2, %1 ]
+  %7 = phi i16 [ poison, %3 ], [ %2, %1 ]
   %8 = phi i32 [ %19, %3 ], [ undef, %1 ]
   %9 = lshr i16 %6, 1
   %10 = shl i16 %7, 15
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index b33ad9a7d339f2..4e90a8f0ba4313 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -93,9 +93,10 @@ define i32 @test5_undef(i32 %A, i1 %cond) {
 ; CHECK-NEXT:  BB0:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       Loop:
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[A:%.*]], [[BB0:%.*]] ], [ undef, [[LOOP]] ]
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       Exit:
-; CHECK-NEXT:    ret i32 [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[B]]
 ;
 BB0:
   br label %Loop
diff --git a/llvm/test/Transforms/InstSimplify/phi.ll b/llvm/test/Transforms/InstSimplify/phi.ll
index 5cc3fecb129f4d..496a3c896944b9 100644
--- a/llvm/test/Transforms/InstSimplify/phi.ll
+++ b/llvm/test/Transforms/InstSimplify/phi.ll
@@ -81,7 +81,8 @@ define i32 @undef(i1 %cond, i32 %v) {
 ; CHECK:       B:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       EXIT:
-; CHECK-NEXT:    ret i32 [[V:%.*]]
+; CHECK-NEXT:    [[W:%.*]] = phi i32 [ [[V:%.*]], [[A]] ], [ undef, [[B]] ]
+; CHECK-NEXT:    ret i32 [[W]]
 ;
   br i1 %cond, label %A, label %B
 A:
diff --git a/llvm/test/Transforms/LoopDeletion/pr53969.ll b/llvm/test/Transforms/LoopDeletion/pr53969.ll
index f9b9dc3373b5e7..3b8904e4457f8f 100644
--- a/llvm/test/Transforms/LoopDeletion/pr53969.ll
+++ b/llvm/test/Transforms/LoopDeletion/pr53969.ll
@@ -44,7 +44,7 @@ define void @test() {
 ; CHECK-NEXT:    br i1 false, label [[BB11]], label [[BB12:%.*]]
 ; CHECK:       bb42:
 ; CHECK-NEXT:    [[TMP24_LCSSA:%.*]] = phi i32 [ undef, [[BB22]] ]
-; CHECK-NEXT:    [[TMP18_LCSSA4:%.*]] = phi i64 [ 0, [[BB22]] ]
+; CHECK-NEXT:    [[TMP18_LCSSA4:%.*]] = phi i64 [ undef, [[BB22]] ]
 ; CHECK-NEXT:    store atomic i64 [[TMP18_LCSSA4]], ptr addrspace(1) undef unordered, align 8
 ; CHECK-NEXT:    call void @use(i32 [[TMP24_LCSSA]])
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/LoopVectorize/pr31190.ll b/llvm/test/Transforms/LoopVectorize/pr31190.ll
index b6cacf1a7fe879..c4f1f9c5d8f0ce 100644
--- a/llvm/test/Transforms/LoopVectorize/pr31190.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr31190.ll
@@ -5,7 +5,7 @@
 ; is a SCEV AddRec with respect to an outer loop.
 
 ; In this case, the problematic PHI is:
-; %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
+; %0 = phi i32 [ poison, %for.cond1.preheader ], [ %inc54, %for.body3 ]
 ; Since %inc54 is the IV of the outer loop, and %0 equivalent to it,
 ; we get the situation described above.
 
@@ -47,7 +47,7 @@ for.cond1.preheader:                              ; preds = %for.cond1.for.inc4_
 
 for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
   %inc1 = phi i32 [ %inc.lcssa3, %for.cond1.preheader ], [ %inc, %for.body3 ]
-  %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
+  %0 = phi i32 [ poison, %for.cond1.preheader ], [ %inc54, %for.body3 ]
   %idxprom = sext i32 %0 to i64
   %arrayidx = getelementptr inbounds [1 x i32], ptr @b, i64 0, i64 %idxprom
   store i32 4, ptr %arrayidx, align 4
diff --git a/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll b/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll
index 4be4680b4e019a..09a914fd9fa86f 100644
--- a/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll
+++ b/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll
@@ -8,7 +8,8 @@ define i32 @testfunc(i1 %C, i32 %i, i8 %j) {
 ; CHECK:       [[T]]:
 ; CHECK-NEXT:    br label %[[CONT]]
 ; CHECK:       [[CONT]]:
-; CHECK-NEXT:    ret i32 [[I]]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[I]], %[[T]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK-NEXT:    ret i32 [[I_0]]
 ;
   %I = alloca i32
   br i1 %C, label %T, label %Cont
diff --git a/llvm/test/Transforms/Mem2Reg/single-store.ll b/llvm/test/Transforms/Mem2Reg/single-store.ll
index f864227c49145c..22c5e34ec9bb4c 100644
--- a/llvm/test/Transforms/Mem2Reg/single-store.ll
+++ b/llvm/test/Transforms/Mem2Reg/single-store.ll
@@ -30,7 +30,8 @@ define i8 @single_store_maybe_poison(i1 %cond, i8 %x) {
 ; CHECK:       [[IF]]:
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret i8 [[X]]
+; CHECK-NEXT:    [[A_0:%.*]] = phi i8 [ [[X]], %[[IF]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK-NEXT:    ret i8 [[A_0]]
 ;
   %a = alloca i8, align 1
   br i1 %cond, label %if, label %exit