[llvm] [SelectionDAG] Folding ZERO-EXTEND/SIGN_EXTEND poison to Poison value in getNode (PR #122741)
zhijian lin via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 07:53:31 PDT 2025
https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/122741
>From 160b57eb7573af1cb3da381864cc4f48425f0cb1 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 8 Apr 2025 20:48:07 +0000
Subject: [PATCH 1/4] lower zero(sign) extend poison to poison
---
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 6 +++
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 ++++
.../CodeGen/AArch64/arm64-bitfield-extract.ll | 2 +-
.../CodeGen/AArch64/optimize-cond-branch.ll | 2 +-
.../CodeGen/AArch64/sve-extract-element.ll | 1 -
.../atomic_optimizations_global_pointer.ll | 14 +++----
llvm/test/CodeGen/AMDGPU/ctpop16.ll | 41 +++++++++----------
.../dead-machine-elim-after-dead-lane.ll | 9 ++--
.../CodeGen/AMDGPU/mdt-preserving-crash.ll | 12 ++----
llvm/test/CodeGen/PowerPC/undef-args.ll | 14 +++----
llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll | 8 ----
11 files changed, 57 insertions(+), 60 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index b62cf08693f63..60d9465ccad93 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -213,6 +213,7 @@ class SDValue {
inline bool isTargetOpcode() const;
inline bool isMachineOpcode() const;
inline bool isUndef() const;
+ inline bool isPoison() const;
inline unsigned getMachineOpcode() const;
inline const DebugLoc &getDebugLoc() const;
inline void dump() const;
@@ -697,6 +698,9 @@ END_TWO_BYTE_PACK()
return NodeType == ISD::UNDEF || NodeType == ISD::POISON;
}
+ /// Returns true if the node type is POISON.
+ bool isPoison() const { return NodeType == ISD::POISON; }
+
/// Test if this node is a memory intrinsic (with valid pointer information).
bool isMemIntrinsic() const { return SDNodeBits.IsMemIntrinsic; }
@@ -1270,6 +1274,8 @@ inline bool SDValue::isUndef() const {
return Node->isUndef();
}
+inline bool SDValue::isPoison() const { return Node->isPoison(); }
+
inline bool SDValue::use_empty() const {
return !Node->hasAnyUseOfValue(ResNo);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d6dcb3f15ae7c..3fcd9443418df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6241,6 +6241,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Flags.setNonNeg(N1->getFlags().hasNonNeg());
return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
}
+
+ if (N1.isPoison())
+ return getPOISON(VT);
+
if (N1.isUndef())
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
@@ -6261,6 +6265,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Flags.setNonNeg(N1->getFlags().hasNonNeg());
return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags);
}
+
+ if (N1.isPoison())
+ return getPOISON(VT);
+
if (N1.isUndef())
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 62ff4bbcc9c27..4b1fff642e5f5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -1019,7 +1019,7 @@ define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
define void @sameOperandBFI(i64 %src, i64 %src2, ptr %ptr) {
; LLC-LABEL: sameOperandBFI:
; LLC: // %bb.0: // %entry
-; LLC-NEXT: cbnz wzr, .LBB30_2
+; LLC-NEXT: cbnz w8, .LBB30_2
; LLC-NEXT: // %bb.1: // %if.else
; LLC-NEXT: lsr x8, x0, #47
; LLC-NEXT: and w9, w1, #0x3
diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
index fdf972990e745..8a56360935717 100644
--- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
+++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
@@ -16,7 +16,7 @@ define void @func() uwtable {
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cbnz w8, .LBB0_3
; CHECK-NEXT: // %bb.1: // %b1
-; CHECK-NEXT: cbz wzr, .LBB0_4
+; CHECK-NEXT: cbz w8, .LBB0_4
; CHECK-NEXT: // %bb.2: // %b3
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: and w0, w8, #0x100
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
index 9ebeb098c60c0..c340df1385124 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -523,7 +523,6 @@ define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_undef_lane_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 poison
ret i32 %b
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 62083b3e67ab6..0701ed85a0609 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -8387,10 +8387,10 @@ define amdgpu_kernel void @uniform_or_i16(ptr addrspace(1) %result, ptr addrspac
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7LESS-NEXT: s_load_dword s6, s[4:5], 0xd
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v1, exec_lo, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v1, exec_hi, v1
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-NEXT: ; implicit-def: $vgpr0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB15_2
; GFX7LESS-NEXT: ; %bb.1:
@@ -8735,10 +8735,10 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
; GFX7LESS-NEXT: s_mov_b64 s[6:7], exec
; GFX7LESS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7LESS-NEXT: s_load_dword s10, s[4:5], 0xd
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v1, s6, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v1
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v0
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7LESS-NEXT: ; implicit-def: $vgpr0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_4
; GFX7LESS-NEXT: ; %bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 6bf126af5ade2..fb418afb8b039 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -1292,7 +1292,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB14_4:
-; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: ; implicit-def: $vgpr0
; SI-NEXT: s_branch .LBB14_2
;
; VI-LABEL: ctpop_i16_in_br:
@@ -1329,48 +1329,47 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; EG: ; %bb.0: ; %entry
; EG-NEXT: ALU 0, @20, KC0[], KC1[]
; EG-NEXT: TEX 0 @14
-; EG-NEXT: ALU_PUSH_BEFORE 4, @21, KC0[], KC1[]
+; EG-NEXT: ALU_PUSH_BEFORE 3, @21, KC0[], KC1[]
; EG-NEXT: JUMP @7 POP:1
-; EG-NEXT: ALU 0, @26, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @25, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @16
-; EG-NEXT: ALU_POP_AFTER 1, @27, KC0[], KC1[]
-; EG-NEXT: ALU_PUSH_BEFORE 2, @29, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU_POP_AFTER 1, @26, KC0[], KC1[]
+; EG-NEXT: ALU_PUSH_BEFORE 2, @28, KC0[CB0:0-32], KC1[]
; EG-NEXT: JUMP @11 POP:1
; EG-NEXT: TEX 0 @18
-; EG-NEXT: ALU_POP_AFTER 0, @32, KC0[], KC1[]
-; EG-NEXT: ALU 11, @33, KC0[], KC1[]
+; EG-NEXT: ALU_POP_AFTER 0, @31, KC0[], KC1[]
+; EG-NEXT: ALU 11, @32, KC0[], KC1[]
; EG-NEXT: MEM_RAT MSKOR T1.XW, T0.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 14:
-; EG-NEXT: VTX_READ_16 T2.X, T1.X, 46, #3
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
; EG-NEXT: Fetch clause starting at 16:
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
+; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
; EG-NEXT: Fetch clause starting at 18:
-; EG-NEXT: VTX_READ_16 T0.X, T1.X, 44, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
; EG-NEXT: ALU clause starting at 20:
-; EG-NEXT: MOV * T1.X, 0.0,
+; EG-NEXT: MOV * T0.X, 0.0,
; EG-NEXT: ALU clause starting at 21:
-; EG-NEXT: MOV T0.X, literal.x,
-; EG-NEXT: MOV T1.W, literal.y,
-; EG-NEXT: SETNE_INT * T0.W, T2.X, 0.0,
-; EG-NEXT: 0(0.000000e+00), 1(1.401298e-45)
+; EG-NEXT: MOV T1.W, literal.x,
+; EG-NEXT: SETNE_INT * T0.W, T1.X, 0.0,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT: ALU clause starting at 25:
+; EG-NEXT: MOV * T1.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 26:
-; EG-NEXT: MOV * T0.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 27:
; EG-NEXT: MOV * T1.W, literal.x,
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 29:
+; EG-NEXT: ALU clause starting at 28:
; EG-NEXT: MOV T0.W, KC0[2].Y,
; EG-NEXT: SETE_INT * T1.W, T1.W, 0.0,
; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT: ALU clause starting at 31:
+; EG-NEXT: BCNT_INT * T1.X, T0.X,
; EG-NEXT: ALU clause starting at 32:
-; EG-NEXT: BCNT_INT * T0.X, T0.X,
-; EG-NEXT: ALU clause starting at 33:
; EG-NEXT: LSHL * T1.W, T0.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T1.X, literal.y,
; EG-NEXT: 24(3.363116e-44), 65535(9.183409e-41)
; EG-NEXT: LSHL T1.X, PS, PV.W,
; EG-NEXT: LSHL * T1.W, literal.x, PV.W,
diff --git a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
index d616fecfdc1ff..f68b035334fd5 100644
--- a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
+++ b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
@@ -6,15 +6,16 @@
define amdgpu_kernel void @foo() {
; CHECK-LABEL: foo:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_cbranch_execnz .LBB0_2
-; CHECK-NEXT: ; %bb.1: ; %LeafBlock1
-; CHECK-NEXT: .LBB0_2: ; %foo.exit
+; CHECK-NEXT: ; %bb.1: ; %LeafBlock1
+; CHECK-NEXT: s_cmp_eq_u32 s0, 10
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_3
+; CHECK-NEXT: ; %bb.2:
; CHECK-NEXT: s_mov_b32 s3, 0xf000
; CHECK-NEXT: s_mov_b32 s2, -1
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
; CHECK-NEXT: s_endpgm
-; CHECK-NEXT: ; %bb.3: ; %sw.bb10
+; CHECK-NEXT: .LBB0_3:
entry:
switch i8 poison, label %foo.exit [
i8 4, label %sw.bb4
diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
index 3bb840eb51690..4b8ef2c9613a5 100644
--- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
@@ -29,17 +29,13 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: s_cmp_eq_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.3: ; %if.end60
-; CHECK-NEXT: s_mov_b64 vcc, exec
; CHECK-NEXT: s_cbranch_execz .LBB0_11
; CHECK-NEXT: ; %bb.4: ; %if.end5.i
-; CHECK-NEXT: s_mov_b64 vcc, vcc
-; CHECK-NEXT: s_cbranch_vccz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.5: ; %if.end5.i314
-; CHECK-NEXT: s_mov_b64 vcc, exec
-; CHECK-NEXT: s_cbranch_execz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.6: ; %if.end5.i338
-; CHECK-NEXT: s_mov_b64 vcc, vcc
-; CHECK-NEXT: s_cbranch_vccz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.7: ; %if.end5.i362
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
@@ -50,7 +46,7 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: buffer_store_byte v0, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v1, off, s[0:3], 0 offset:257
-; CHECK-NEXT: s_cbranch_execz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.8: ; %if.end5.i400
; CHECK-NEXT: flat_load_ubyte v0, v[0:1]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/PowerPC/undef-args.ll b/llvm/test/CodeGen/PowerPC/undef-args.ll
index 8ae43483f0d11..277aba40fb271 100644
--- a/llvm/test/CodeGen/PowerPC/undef-args.ll
+++ b/llvm/test/CodeGen/PowerPC/undef-args.ll
@@ -1,5 +1,4 @@
-;; Tests that extending poison results in undef.
-;; Also tests that there are redundant instructions loading 0 into argument registers for unused arguments.
+;; Tests that load 0 into argument registers for unused arguments are eliminated.
; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -debug-only=isel \
; RUN: %s -o - 2>&1 | FileCheck --check-prefix=CHECKISEL32 %s
@@ -60,7 +59,7 @@ entry:
; CHECKISEL64-NEXT: t7: i64 = Register $x1
; CHECKISEL64-NEXT: t0: ch,glue = EntryToken
; CHECKISEL64-NEXT: t6: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT: t11: ch,glue = CopyToReg t6, Register:i64 $x3, Constant:i64<0>
+; CHECKISEL64-NEXT: t11: ch,glue = CopyToReg t6, Register:i64 $x3, poison:i64
; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t11, Register:i64 $x4, Constant:i64<255>, t11:1
; CHECKISEL64-NEXT: t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t13:1
; CHECKISEL64-NEXT: t18: ch,glue = callseq_end t17, TargetConstant:i64<112>, TargetConstant:i64<0>, t17:1
@@ -70,7 +69,6 @@ entry:
; CHECKASM64-NEXT: # %bb.0: # %entry
; CHECKASM64-NEXT: mflr 0
; CHECKASM64-NEXT: stdu 1, -112(1)
-; CHECKASM64-NEXT: li 3, 0
; CHECKASM64-NEXT: li 4, 255
; CHECKASM64-NEXT: std 0, 128(1)
; CHECKASM64-NEXT: bl .bar32
@@ -102,7 +100,7 @@ entry:
; CHECKISEL32-NEXT: t9: i32 = Register $r1
; CHECKISEL32-NEXT: t0: ch,glue = EntryToken
; CHECKISEL32-NEXT: t8: ch,glue = callseq_start t0, TargetConstant:i32<56>, TargetConstant:i32<0>
-; CHECKISEL32-NEXT: t11: ch,glue = CopyToReg t8, Register:i32 $r3, Constant:i32<0>
+; CHECKISEL32-NEXT: t11: ch,glue = CopyToReg t8, Register:i32 $r3, poison:i32
; CHECKISEL32-NEXT: t13: ch,glue = CopyToReg t11, Register:i32 $r4, Constant:i32<255>, t11:1
; CHECKISEL32-NEXT: t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i32, Register:i32 $r3, Register:i32 $r4, Register:i32 $r2, RegisterMask:Untyped, t13:1
; CHECKISEL32-NEXT: t18: ch,glue = callseq_end t17, TargetConstant:i32<56>, TargetConstant:i32<0>, t17:1
@@ -112,7 +110,6 @@ entry:
; CHECKASM32-NEXT: # %bb.0: # %entry
; CHECKASM32-NEXT: mflr 0
; CHECKASM32-NEXT: stwu 1, -64(1)
-; CHECKASM32-NEXT: li 3, 0
; CHECKASM32-NEXT: li 4, 255
; CHECKASM32-NEXT: stw 0, 72(1)
; CHECKASM32-NEXT: bl .bar8
@@ -126,7 +123,6 @@ entry:
; CHECKASM64-NEXT: # %bb.0: # %entry
; CHECKASM64-NEXT: mflr 0
; CHECKASM64-NEXT: stdu 1, -112(1)
-; CHECKASM64-NEXT: li 3, 0
; CHECKASM64-NEXT: li 4, 255
; CHECKASM64-NEXT: std 0, 128(1)
; CHECKASM64-NEXT: bl .bar8
@@ -141,12 +137,12 @@ entry:
; CHECKISEL64-NEXT: t1: i64 = GlobalAddress<ptr @bar8> 0
; CHECKISEL64-NEXT: t2: i8 = poison
; CHECKISEL64-NEXT: t3: i8 = Constant<-1>
-; CHECKISEL64-NEXT: t4: i32 = Constant<0>
+; CHECKISEL64-NEXT: t4: i32 = poison
; CHECKISEL64-NEXT: t5: i32 = Constant<255>
; CHECKISEL64-NEXT: t9: i64 = Register $x1
; CHECKISEL64-NEXT: t0: ch,glue = EntryToken
; CHECKISEL64-NEXT: t8: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t8, Register:i64 $x3, Constant:i64<0>
+; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t8, Register:i64 $x3, poison:i64
; CHECKISEL64-NEXT: t15: ch,glue = CopyToReg t13, Register:i64 $x4, Constant:i64<255>, t13:1
; CHECKISEL64-NEXT: t19: ch,glue = PPCISD::CALL_NOP t15, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t15:1
; CHECKISEL64-NEXT: t20: ch,glue = callseq_end t19, TargetConstant:i64<112>, TargetConstant:i64<0>, t19:1
diff --git a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
index a5847365159a8..395fc99ea0536 100644
--- a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
+++ b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
@@ -27,14 +27,6 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 {
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: li a7, 0
; CHECK-NEXT: call _Z3fooiiiiiiiiiiPi
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: .Ltmp1:
>From 828e637ef8f41cd051353e88d63946cda264b2bc Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 10 Apr 2025 20:05:47 +0000
Subject: [PATCH 2/4] change AMDGPU test case
---
.../CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll | 23 +++++--------------
1 file changed, 6 insertions(+), 17 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
index b3d9e61b65b6f..b0627c3d4e77d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
@@ -92,27 +92,17 @@ define i16 @bitcast_f16_to_i16(half %a, i32 %b) {
; GCN-LABEL: bitcast_f16_to_i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT: s_cbranch_execnz .LBB1_3
-; GCN-NEXT: ; %bb.1: ; %Flow
-; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GCN-NEXT: s_cbranch_execnz .LBB1_4
-; GCN-NEXT: .LBB1_2: ; %end
-; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT: s_setpc_b64 s[30:31]
-; GCN-NEXT: .LBB1_3: ; %cmp.false
-; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GCN-NEXT: s_cbranch_execz .LBB1_2
-; GCN-NEXT: .LBB1_4: ; %cmp.true
-; GCN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GCN-NEXT: ; %bb.1:
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
; GCN-NEXT: v_add_f32_e32 v0, 0x38000000, v0
; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: .LBB1_2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -249,10 +239,9 @@ define i16 @bitcast_bf16_to_i16(bfloat %a, i32 %b) {
; GCN-LABEL: bitcast_bf16_to_i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v2
+; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v0
+; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GCN-NEXT: s_cbranch_execnz .LBB3_3
>From b0366ed81bca501f95f0f35b4021ed2a127ea59c Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 11 Apr 2025 13:30:32 +0000
Subject: [PATCH 3/4] minor change
---
llvm/test/CodeGen/PowerPC/undef-args.ll | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/PowerPC/undef-args.ll b/llvm/test/CodeGen/PowerPC/undef-args.ll
index 277aba40fb271..f636fccda1324 100644
--- a/llvm/test/CodeGen/PowerPC/undef-args.ll
+++ b/llvm/test/CodeGen/PowerPC/undef-args.ll
@@ -1,4 +1,7 @@
-;; Tests that load 0 into argument registers for unused arguments are eliminated.
+;; Tests that extending poison results in poison.
+;; Also tests that there are no redundant instructions loading 0 into argument registers for unused arguments.
+
+; REQUIRES: asserts
; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -debug-only=isel \
; RUN: %s -o - 2>&1 | FileCheck --check-prefix=CHECKISEL32 %s
>From dae013d9285f825faca90d4ca594fa53cf12511c Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 14 Apr 2025 14:53:12 +0000
Subject: [PATCH 4/4] address comment
---
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 6 ------
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ++--
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 60d9465ccad93..b62cf08693f63 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -213,7 +213,6 @@ class SDValue {
inline bool isTargetOpcode() const;
inline bool isMachineOpcode() const;
inline bool isUndef() const;
- inline bool isPoison() const;
inline unsigned getMachineOpcode() const;
inline const DebugLoc &getDebugLoc() const;
inline void dump() const;
@@ -698,9 +697,6 @@ END_TWO_BYTE_PACK()
return NodeType == ISD::UNDEF || NodeType == ISD::POISON;
}
- /// Returns true if the node type is POISON.
- bool isPoison() const { return NodeType == ISD::POISON; }
-
/// Test if this node is a memory intrinsic (with valid pointer information).
bool isMemIntrinsic() const { return SDNodeBits.IsMemIntrinsic; }
@@ -1274,8 +1270,6 @@ inline bool SDValue::isUndef() const {
return Node->isUndef();
}
-inline bool SDValue::isPoison() const { return Node->isPoison(); }
-
inline bool SDValue::use_empty() const {
return !Node->hasAnyUseOfValue(ResNo);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3fcd9443418df..8f8c1167e81a0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6242,7 +6242,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
}
- if (N1.isPoison())
+ if (OpOpcode == ISD::POISON)
return getPOISON(VT);
if (N1.isUndef())
@@ -6266,7 +6266,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags);
}
- if (N1.isPoison())
+ if (OpOpcode == ISD::POISON)
return getPOISON(VT);
if (N1.isUndef())
More information about the llvm-commits
mailing list