[llvm] [SelectionDAG] Folding ZERO-EXTEND/SIGN_EXTEND poison to Poison value in getNode (PR #122741)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 14 07:53:31 PDT 2025


https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/122741

>From 160b57eb7573af1cb3da381864cc4f48425f0cb1 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 8 Apr 2025 20:48:07 +0000
Subject: [PATCH 1/4] lower zero(sign) extend poison to poison

---
 llvm/include/llvm/CodeGen/SelectionDAGNodes.h |  6 +++
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  8 ++++
 .../CodeGen/AArch64/arm64-bitfield-extract.ll |  2 +-
 .../CodeGen/AArch64/optimize-cond-branch.ll   |  2 +-
 .../CodeGen/AArch64/sve-extract-element.ll    |  1 -
 .../atomic_optimizations_global_pointer.ll    | 14 +++----
 llvm/test/CodeGen/AMDGPU/ctpop16.ll           | 41 +++++++++----------
 .../dead-machine-elim-after-dead-lane.ll      |  9 ++--
 .../CodeGen/AMDGPU/mdt-preserving-crash.ll    | 12 ++----
 llvm/test/CodeGen/PowerPC/undef-args.ll       | 14 +++----
 llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll |  8 ----
 11 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index b62cf08693f63..60d9465ccad93 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -213,6 +213,7 @@ class SDValue {
   inline bool isTargetOpcode() const;
   inline bool isMachineOpcode() const;
   inline bool isUndef() const;
+  inline bool isPoison() const;
   inline unsigned getMachineOpcode() const;
   inline const DebugLoc &getDebugLoc() const;
   inline void dump() const;
@@ -697,6 +698,9 @@ END_TWO_BYTE_PACK()
     return NodeType == ISD::UNDEF || NodeType == ISD::POISON;
   }
 
+  /// Returns true if the node type is POISON.
+  bool isPoison() const { return NodeType == ISD::POISON; }
+
   /// Test if this node is a memory intrinsic (with valid pointer information).
   bool isMemIntrinsic() const { return SDNodeBits.IsMemIntrinsic; }
 
@@ -1270,6 +1274,8 @@ inline bool SDValue::isUndef() const {
   return Node->isUndef();
 }
 
+inline bool SDValue::isPoison() const { return Node->isPoison(); }
+
 inline bool SDValue::use_empty() const {
   return !Node->hasAnyUseOfValue(ResNo);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d6dcb3f15ae7c..3fcd9443418df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6241,6 +6241,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
         Flags.setNonNeg(N1->getFlags().hasNonNeg());
       return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
     }
+
+    if (N1.isPoison())
+      return getPOISON(VT);
+
     if (N1.isUndef())
       // sext(undef) = 0, because the top bits will all be the same.
       return getConstant(0, DL, VT);
@@ -6261,6 +6265,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       Flags.setNonNeg(N1->getFlags().hasNonNeg());
       return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags);
     }
+
+    if (N1.isPoison())
+      return getPOISON(VT);
+
     if (N1.isUndef())
       // zext(undef) = 0, because the top bits will be zero.
       return getConstant(0, DL, VT);
diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 62ff4bbcc9c27..4b1fff642e5f5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -1019,7 +1019,7 @@ define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
 define void @sameOperandBFI(i64 %src, i64 %src2, ptr %ptr) {
 ; LLC-LABEL: sameOperandBFI:
 ; LLC:       // %bb.0: // %entry
-; LLC-NEXT:    cbnz wzr, .LBB30_2
+; LLC-NEXT:    cbnz w8, .LBB30_2
 ; LLC-NEXT:  // %bb.1: // %if.else
 ; LLC-NEXT:    lsr x8, x0, #47
 ; LLC-NEXT:    and w9, w1, #0x3
diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
index fdf972990e745..8a56360935717 100644
--- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
+++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
@@ -16,7 +16,7 @@ define void @func() uwtable {
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    cbnz w8, .LBB0_3
 ; CHECK-NEXT:  // %bb.1: // %b1
-; CHECK-NEXT:    cbz wzr, .LBB0_4
+; CHECK-NEXT:    cbz w8, .LBB0_4
 ; CHECK-NEXT:  // %bb.2: // %b3
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    and w0, w8, #0x100
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
index 9ebeb098c60c0..c340df1385124 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -523,7 +523,6 @@ define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
 define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
 ; CHECK-LABEL: test_undef_lane_4xi32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x i32> %a, i32 poison
   ret i32 %b
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 62083b3e67ab6..0701ed85a0609 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -8387,10 +8387,10 @@ define amdgpu_kernel void @uniform_or_i16(ptr addrspace(1) %result, ptr addrspac
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; GFX7LESS-NEXT:    s_load_dword s6, s[4:5], 0xd
-; GFX7LESS-NEXT:    v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v1, exec_lo, 0
-; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v1, exec_hi, v1
-; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-NEXT:                       ; implicit-def: $vgpr0
 ; GFX7LESS-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GFX7LESS-NEXT:    s_cbranch_execz .LBB15_2
 ; GFX7LESS-NEXT:  ; %bb.1:
@@ -8735,10 +8735,10 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX7LESS-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; GFX7LESS-NEXT:    s_load_dword s10, s[4:5], 0xd
-; GFX7LESS-NEXT:    v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v1, s6, 0
-; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v4, s7, v1
+; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, s6, 0
+; GFX7LESS-NEXT:    v_mbcnt_hi_u32_b32_e32 v4, s7, v0
 ; GFX7LESS-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7LESS-NEXT:                               ; implicit-def: $vgpr0
 ; GFX7LESS-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX7LESS-NEXT:    s_cbranch_execz .LBB16_4
 ; GFX7LESS-NEXT:  ; %bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 6bf126af5ade2..fb418afb8b039 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -1292,7 +1292,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ; SI-NEXT:  .LBB14_4:
-; SI-NEXT:    v_mov_b32_e32 v0, 0
+; SI-NEXT:                    ; implicit-def: $vgpr0
 ; SI-NEXT:    s_branch .LBB14_2
 ;
 ; VI-LABEL: ctpop_i16_in_br:
@@ -1329,48 +1329,47 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; EG:       ; %bb.0: ; %entry
 ; EG-NEXT:    ALU 0, @20, KC0[], KC1[]
 ; EG-NEXT:    TEX 0 @14
-; EG-NEXT:    ALU_PUSH_BEFORE 4, @21, KC0[], KC1[]
+; EG-NEXT:    ALU_PUSH_BEFORE 3, @21, KC0[], KC1[]
 ; EG-NEXT:    JUMP @7 POP:1
-; EG-NEXT:    ALU 0, @26, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @25, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @16
-; EG-NEXT:    ALU_POP_AFTER 1, @27, KC0[], KC1[]
-; EG-NEXT:    ALU_PUSH_BEFORE 2, @29, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU_POP_AFTER 1, @26, KC0[], KC1[]
+; EG-NEXT:    ALU_PUSH_BEFORE 2, @28, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    JUMP @11 POP:1
 ; EG-NEXT:    TEX 0 @18
-; EG-NEXT:    ALU_POP_AFTER 0, @32, KC0[], KC1[]
-; EG-NEXT:    ALU 11, @33, KC0[], KC1[]
+; EG-NEXT:    ALU_POP_AFTER 0, @31, KC0[], KC1[]
+; EG-NEXT:    ALU 11, @32, KC0[], KC1[]
 ; EG-NEXT:    MEM_RAT MSKOR T1.XW, T0.X
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 14:
-; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 46, #3
+; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 46, #3
 ; EG-NEXT:    Fetch clause starting at 16:
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 2, #1
+; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
 ; EG-NEXT:    Fetch clause starting at 18:
-; EG-NEXT:     VTX_READ_16 T0.X, T1.X, 44, #3
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 44, #3
 ; EG-NEXT:    ALU clause starting at 20:
-; EG-NEXT:     MOV * T1.X, 0.0,
+; EG-NEXT:     MOV * T0.X, 0.0,
 ; EG-NEXT:    ALU clause starting at 21:
-; EG-NEXT:     MOV T0.X, literal.x,
-; EG-NEXT:     MOV T1.W, literal.y,
-; EG-NEXT:     SETNE_INT * T0.W, T2.X, 0.0,
-; EG-NEXT:    0(0.000000e+00), 1(1.401298e-45)
+; EG-NEXT:     MOV T1.W, literal.x,
+; EG-NEXT:     SETNE_INT * T0.W, T1.X, 0.0,
+; EG-NEXT:    1(1.401298e-45), 0(0.000000e+00)
 ; EG-NEXT:     PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT:    ALU clause starting at 25:
+; EG-NEXT:     MOV * T1.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 26:
-; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 27:
 ; EG-NEXT:     MOV * T1.W, literal.x,
 ; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
-; EG-NEXT:    ALU clause starting at 29:
+; EG-NEXT:    ALU clause starting at 28:
 ; EG-NEXT:     MOV T0.W, KC0[2].Y,
 ; EG-NEXT:     SETE_INT * T1.W, T1.W, 0.0,
 ; EG-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT:    ALU clause starting at 31:
+; EG-NEXT:     BCNT_INT * T1.X, T0.X,
 ; EG-NEXT:    ALU clause starting at 32:
-; EG-NEXT:     BCNT_INT * T0.X, T0.X,
-; EG-NEXT:    ALU clause starting at 33:
 ; EG-NEXT:     LSHL * T1.W, T0.W, literal.x,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT:     AND_INT * T2.W, T1.X, literal.y,
 ; EG-NEXT:    24(3.363116e-44), 65535(9.183409e-41)
 ; EG-NEXT:     LSHL T1.X, PS, PV.W,
 ; EG-NEXT:     LSHL * T1.W, literal.x, PV.W,
diff --git a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
index d616fecfdc1ff..f68b035334fd5 100644
--- a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
+++ b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
@@ -6,15 +6,16 @@
 define amdgpu_kernel void @foo() {
 ; CHECK-LABEL: foo:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_2
-; CHECK-NEXT:  ; %bb.1: ; %LeafBlock1
-; CHECK-NEXT:  .LBB0_2: ; %foo.exit
+; CHECK-NEXT:   ; %bb.1:                                ; %LeafBlock1
+; CHECK-NEXT:    s_cmp_eq_u32 s0, 10
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_3
+; CHECK-NEXT:  ; %bb.2:
 ; CHECK-NEXT:    s_mov_b32 s3, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s2, -1
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CHECK-NEXT:    s_endpgm
-; CHECK-NEXT:  ; %bb.3: ; %sw.bb10
+; CHECK-NEXT: .LBB0_3:
 entry:
   switch i8 poison, label %foo.exit [
     i8 4, label %sw.bb4
diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
index 3bb840eb51690..4b8ef2c9613a5 100644
--- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
@@ -29,17 +29,13 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
 ; CHECK-NEXT:    s_cmp_eq_u32 s4, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_2
 ; CHECK-NEXT:  ; %bb.3: ; %if.end60
-; CHECK-NEXT:    s_mov_b64 vcc, exec
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_11
 ; CHECK-NEXT:  ; %bb.4: ; %if.end5.i
-; CHECK-NEXT:    s_mov_b64 vcc, vcc
-; CHECK-NEXT:    s_cbranch_vccz .LBB0_11
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_11
 ; CHECK-NEXT:  ; %bb.5: ; %if.end5.i314
-; CHECK-NEXT:    s_mov_b64 vcc, exec
-; CHECK-NEXT:    s_cbranch_execz .LBB0_11
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_11
 ; CHECK-NEXT:  ; %bb.6: ; %if.end5.i338
-; CHECK-NEXT:    s_mov_b64 vcc, vcc
-; CHECK-NEXT:    s_cbranch_vccz .LBB0_11
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_11
 ; CHECK-NEXT:  ; %bb.7: ; %if.end5.i362
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    s_getpc_b64 s[4:5]
@@ -50,7 +46,7 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
 ; CHECK-NEXT:    buffer_store_byte v0, v0, s[0:3], 0 offen
 ; CHECK-NEXT:    s_waitcnt vmcnt(1)
 ; CHECK-NEXT:    buffer_store_byte v1, off, s[0:3], 0 offset:257
-; CHECK-NEXT:    s_cbranch_execz .LBB0_11
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_11
 ; CHECK-NEXT:  ; %bb.8: ; %if.end5.i400
 ; CHECK-NEXT:    flat_load_ubyte v0, v[0:1]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/PowerPC/undef-args.ll b/llvm/test/CodeGen/PowerPC/undef-args.ll
index 8ae43483f0d11..277aba40fb271 100644
--- a/llvm/test/CodeGen/PowerPC/undef-args.ll
+++ b/llvm/test/CodeGen/PowerPC/undef-args.ll
@@ -1,5 +1,4 @@
-;; Tests that extending poison results in undef.
-;; Also tests that there are redundant instructions loading 0 into argument registers for unused arguments.
+;; Tests that load 0 into argument registers for unused arguments are eliminated. 
 
 ; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -debug-only=isel \
 ; RUN:   %s -o - 2>&1 | FileCheck --check-prefix=CHECKISEL32 %s
@@ -60,7 +59,7 @@ entry:
 ; CHECKISEL64-NEXT:   t7: i64 = Register $x1
 ; CHECKISEL64-NEXT:       t0: ch,glue = EntryToken
 ; CHECKISEL64-NEXT:     t6: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT:   t11: ch,glue = CopyToReg t6, Register:i64 $x3, Constant:i64<0> 
+; CHECKISEL64-NEXT:   t11: ch,glue = CopyToReg t6, Register:i64 $x3, poison:i64
 ; CHECKISEL64-NEXT:   t13: ch,glue = CopyToReg t11, Register:i64 $x4, Constant:i64<255>, t11:1
 ; CHECKISEL64-NEXT:   t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t13:1
 ; CHECKISEL64-NEXT:     t18: ch,glue = callseq_end t17, TargetConstant:i64<112>, TargetConstant:i64<0>, t17:1
@@ -70,7 +69,6 @@ entry:
 ; CHECKASM64-NEXT: # %bb.0:                                # %entry
 ; CHECKASM64-NEXT:         mflr 0
 ; CHECKASM64-NEXT:         stdu 1, -112(1)
-; CHECKASM64-NEXT:         li 3, 0
 ; CHECKASM64-NEXT:         li 4, 255
 ; CHECKASM64-NEXT:         std 0, 128(1)
 ; CHECKASM64-NEXT:         bl .bar32
@@ -102,7 +100,7 @@ entry:
 ; CHECKISEL32-NEXT:   t9: i32 = Register $r1
 ; CHECKISEL32-NEXT:       t0: ch,glue = EntryToken
 ; CHECKISEL32-NEXT:     t8: ch,glue = callseq_start t0, TargetConstant:i32<56>, TargetConstant:i32<0>
-; CHECKISEL32-NEXT:   t11: ch,glue = CopyToReg t8, Register:i32 $r3, Constant:i32<0> 
+; CHECKISEL32-NEXT:   t11: ch,glue = CopyToReg t8, Register:i32 $r3, poison:i32
 ; CHECKISEL32-NEXT:   t13: ch,glue = CopyToReg t11, Register:i32 $r4, Constant:i32<255>, t11:1
 ; CHECKISEL32-NEXT:   t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i32, Register:i32 $r3, Register:i32 $r4, Register:i32 $r2, RegisterMask:Untyped, t13:1
 ; CHECKISEL32-NEXT:     t18: ch,glue = callseq_end t17, TargetConstant:i32<56>, TargetConstant:i32<0>, t17:1
@@ -112,7 +110,6 @@ entry:
 ; CHECKASM32-NEXT: # %bb.0:                                # %entry
 ; CHECKASM32-NEXT:         mflr 0
 ; CHECKASM32-NEXT:         stwu 1, -64(1)
-; CHECKASM32-NEXT:         li 3, 0
 ; CHECKASM32-NEXT:         li 4, 255
 ; CHECKASM32-NEXT:         stw 0, 72(1)
 ; CHECKASM32-NEXT:         bl .bar8
@@ -126,7 +123,6 @@ entry:
 ; CHECKASM64-NEXT: # %bb.0:                                # %entry
 ; CHECKASM64-NEXT:         mflr 0
 ; CHECKASM64-NEXT:         stdu 1, -112(1)
-; CHECKASM64-NEXT:         li 3, 0
 ; CHECKASM64-NEXT:         li 4, 255
 ; CHECKASM64-NEXT:         std 0, 128(1)
 ; CHECKASM64-NEXT:         bl .bar8
@@ -141,12 +137,12 @@ entry:
 ; CHECKISEL64-NEXT:   t1: i64 = GlobalAddress<ptr @bar8> 0
 ; CHECKISEL64-NEXT:   t2: i8 = poison
 ; CHECKISEL64-NEXT:   t3: i8 = Constant<-1>
-; CHECKISEL64-NEXT:   t4: i32 = Constant<0> 
+; CHECKISEL64-NEXT:   t4: i32 = poison
 ; CHECKISEL64-NEXT:   t5: i32 = Constant<255>
 ; CHECKISEL64-NEXT:   t9: i64 = Register $x1
 ; CHECKISEL64-NEXT:       t0: ch,glue = EntryToken
 ; CHECKISEL64-NEXT:     t8: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT:   t13: ch,glue = CopyToReg t8, Register:i64 $x3, Constant:i64<0> 
+; CHECKISEL64-NEXT:   t13: ch,glue = CopyToReg t8, Register:i64 $x3, poison:i64
 ; CHECKISEL64-NEXT:   t15: ch,glue = CopyToReg t13, Register:i64 $x4, Constant:i64<255>, t13:1
 ; CHECKISEL64-NEXT:   t19: ch,glue = PPCISD::CALL_NOP t15, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t15:1
 ; CHECKISEL64-NEXT:     t20: ch,glue = callseq_end t19, TargetConstant:i64<112>, TargetConstant:i64<0>, t19:1
diff --git a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
index a5847365159a8..395fc99ea0536 100644
--- a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
+++ b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
@@ -27,14 +27,6 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:  .Ltmp0:
 ; CHECK-NEXT:    addi sp, sp, -32
-; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    li a6, 0
-; CHECK-NEXT:    li a7, 0
 ; CHECK-NEXT:    call _Z3fooiiiiiiiiiiPi
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:  .Ltmp1:

>From 828e637ef8f41cd051353e88d63946cda264b2bc Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 10 Apr 2025 20:05:47 +0000
Subject: [PATCH 2/4] change AMDGPU test case

---
 .../CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll    | 23 +++++--------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
index b3d9e61b65b6f..b0627c3d4e77d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
@@ -92,27 +92,17 @@ define i16 @bitcast_f16_to_i16(half %a, i32 %b) {
 ; GCN-LABEL: bitcast_f16_to_i16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v2, v0
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GCN-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT:    s_cbranch_execnz .LBB1_3
-; GCN-NEXT:  ; %bb.1: ; %Flow
-; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GCN-NEXT:    s_cbranch_execnz .LBB1_4
-; GCN-NEXT:  .LBB1_2: ; %end
-; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT:    s_setpc_b64 s[30:31]
-; GCN-NEXT:  .LBB1_3: ; %cmp.false
-; GCN-NEXT:    v_mov_b32_e32 v0, v1
 ; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
 ; GCN-NEXT:    s_cbranch_execz .LBB1_2
-; GCN-NEXT:  .LBB1_4: ; %cmp.true
-; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v1
+; GCN-NEXT:  ; %bb.1:
+; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; GCN-NEXT:    v_add_f32_e32 v0, 0x38000000, v0
 ; GCN-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT:  .LBB1_2:
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -249,10 +239,9 @@ define i16 @bitcast_bf16_to_i16(bfloat %a, i32 %b) {
 ; GCN-LABEL: bitcast_bf16_to_i16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v2, v0
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v2
+; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v0
+; GCN-NEXT:    ; implicit-def: $vgpr0
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GCN-NEXT:    s_cbranch_execnz .LBB3_3

>From b0366ed81bca501f95f0f35b4021ed2a127ea59c Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 11 Apr 2025 13:30:32 +0000
Subject: [PATCH 3/4] minor change

---
 llvm/test/CodeGen/PowerPC/undef-args.ll | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/PowerPC/undef-args.ll b/llvm/test/CodeGen/PowerPC/undef-args.ll
index 277aba40fb271..f636fccda1324 100644
--- a/llvm/test/CodeGen/PowerPC/undef-args.ll
+++ b/llvm/test/CodeGen/PowerPC/undef-args.ll
@@ -1,4 +1,7 @@
-;; Tests that load 0 into argument registers for unused arguments are eliminated. 
+;; Tests that extending poison results in poison.
+;; Also tests that there are no redundant instructions loading 0 into argument registers for unused arguments.
+
+; REQUIRES: asserts
 
 ; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -debug-only=isel \
 ; RUN:   %s -o - 2>&1 | FileCheck --check-prefix=CHECKISEL32 %s

>From dae013d9285f825faca90d4ca594fa53cf12511c Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 14 Apr 2025 14:53:12 +0000
Subject: [PATCH 4/4] address comment

---
 llvm/include/llvm/CodeGen/SelectionDAGNodes.h  | 6 ------
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ++--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 60d9465ccad93..b62cf08693f63 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -213,7 +213,6 @@ class SDValue {
   inline bool isTargetOpcode() const;
   inline bool isMachineOpcode() const;
   inline bool isUndef() const;
-  inline bool isPoison() const;
   inline unsigned getMachineOpcode() const;
   inline const DebugLoc &getDebugLoc() const;
   inline void dump() const;
@@ -698,9 +697,6 @@ END_TWO_BYTE_PACK()
     return NodeType == ISD::UNDEF || NodeType == ISD::POISON;
   }
 
-  /// Returns true if the node type is POISON.
-  bool isPoison() const { return NodeType == ISD::POISON; }
-
   /// Test if this node is a memory intrinsic (with valid pointer information).
   bool isMemIntrinsic() const { return SDNodeBits.IsMemIntrinsic; }
 
@@ -1274,8 +1270,6 @@ inline bool SDValue::isUndef() const {
   return Node->isUndef();
 }
 
-inline bool SDValue::isPoison() const { return Node->isPoison(); }
-
 inline bool SDValue::use_empty() const {
   return !Node->hasAnyUseOfValue(ResNo);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3fcd9443418df..8f8c1167e81a0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6242,7 +6242,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
     }
 
-    if (N1.isPoison())
+    if (OpOpcode == ISD::POISON)
       return getPOISON(VT);
 
     if (N1.isUndef())
@@ -6266,7 +6266,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags);
     }
 
-    if (N1.isPoison())
+    if (OpOpcode == ISD::POISON)
       return getPOISON(VT);
 
     if (N1.isUndef())



More information about the llvm-commits mailing list