[llvm] 8b36d08 - [DAG] getNode() - fold (zext (trunc x)) -> x iff the upper bits are known zero - add SRL support
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 24 05:49:55 PDT 2023
Author: Simon Pilgrim
Date: 2023-09-24T13:40:07+01:00
New Revision: 8b36d082c48c81454dcc66d9e70e473ae4b2b7f8
URL: https://github.com/llvm/llvm-project/commit/8b36d082c48c81454dcc66d9e70e473ae4b2b7f8
DIFF: https://github.com/llvm/llvm-project/commit/8b36d082c48c81454dcc66d9e70e473ae4b2b7f8.diff
LOG: [DAG] getNode() - fold (zext (trunc x)) -> x iff the upper bits are known zero - add SRL support
This is part of the work to address the D155472 regressions, there's a number of issues with generalizing this fold which is why I'm just adding SRL support atm.
Differential Revision: https://reviews.llvm.org/D159533
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/AMDGPU/ctpop16.ll
llvm/test/CodeGen/AMDGPU/permute_i8.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f12db53c7f08738..cd21af770e1a4d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5695,12 +5695,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getConstant(0, DL, VT);
// Skip unnecessary zext_inreg pattern:
- // (zext (trunc (assertzext x))) -> (assertzext x)
- // TODO: Generalize to MaskedValueIsZero check?
+ // (zext (trunc x)) -> x iff the upper bits are known zero.
+ // TODO: Generalize to just the MaskedValueIsZero check?
if (OpOpcode == ISD::TRUNCATE) {
SDValue OpOp = N1.getOperand(0);
if (OpOp.getValueType() == VT) {
- if (OpOp.getOpcode() == ISD::AssertZext) {
+ if (OpOp.getOpcode() == ISD::AssertZext ||
+ OpOp.getOpcode() == ISD::SRL) {
APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(),
N1.getScalarValueSizeInBits());
if (MaskedValueIsZero(OpOp, HiBits)) {
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 26ccede123601ee..78c657049fcb2ad 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -1523,9 +1523,8 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_lshr_b32 s5, s4, 16
-; VI-NEXT: v_cmp_ne_u16_e64 s[6:7], s5, 0
-; VI-NEXT: s_and_b64 vcc, exec, s[6:7]
-; VI-NEXT: s_cbranch_vccz .LBB14_4
+; VI-NEXT: s_cmp_lg_u32 s5, 0
+; VI-NEXT: s_cbranch_scc0 .LBB14_4
; VI-NEXT: ; %bb.1: ; %else
; VI-NEXT: s_mov_b32 s11, 0xf000
; VI-NEXT: s_mov_b32 s10, -1
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index 6f6452c69fb214c..5f896f92de0f424 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -353,7 +353,7 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_perm_b32 v0, v0, v0, 0x5040706
+; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX10-NEXT: global_store_dword v[2:3], v0, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -361,9 +361,8 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dword v0, v[0:1], off
-; GFX9-NEXT: s_mov_b32 s4, 0x5040706
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_perm_b32 v0, v0, v0, s4
+; GFX9-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX9-NEXT: global_store_dword v[2:3], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
More information about the llvm-commits
mailing list