[PATCH] D127598: [DAG] SimplifyDemandedBits OR(Op0, Op1) - attempt to simplify Op1 again once we have the known bits of Op0
Simon Pilgrim via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 12 07:05:41 PDT 2022
RKSimon created this revision.
RKSimon added reviewers: foad, spatel, craig.topper.
Herald added subscribers: kosarev, StephenFan, kerbowa, hiraditya, jvesely.
Herald added a project: All.
RKSimon requested review of this revision.
Herald added a project: LLVM.
Noticed while investigating the regressions in D125836 <https://reviews.llvm.org/D125836>
@foad You tried something similar on D87464 <https://reviews.llvm.org/D87464> but weren't accounting for the already demanded bits of Op0 - the AND(Op0,Op1) case might need something similar.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D127598
Files:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/permute.ll
Index: llvm/test/CodeGen/AMDGPU/permute.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/permute.ll
+++ llvm/test/CodeGen/AMDGPU/permute.ll
@@ -191,23 +191,20 @@
ret void
}
-; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
; GCN-LABEL: and_or_or_and:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v3, 0xffff0500
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GCN-NEXT: flat_load_dword v2, v[0:1]
-; GCN-NEXT: s_and_b32 s0, s0, 0xff00
-; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2
-; GCN-NEXT: v_or_b32_e32 v2, s0, v2
+; GCN-NEXT: v_perm_b32 v2, s0, v2, v3
; GCN-NEXT: flat_store_dword v[0:1], v2
; GCN-NEXT: s_endpgm
bb:
@@ -277,30 +274,27 @@
ret void
}
-; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
; GCN-LABEL: known_ffff0500:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004
+; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500
+; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GCN-NEXT: flat_load_dword v4, v[0:1]
; GCN-NEXT: s_bitset1_b32 s0, 15
-; GCN-NEXT: s_and_b32 s0, s0, 0xff00
-; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v3, s3
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_or_b32_e32 v4, 4, v4
-; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4
-; GCN-NEXT: v_or_b32_e32 v4, s0, v4
+; GCN-NEXT: v_perm_b32 v4, s0, v4, v5
; GCN-NEXT: flat_store_dword v[0:1], v4
-; GCN-NEXT: flat_store_dword v[2:3], v5
+; GCN-NEXT: flat_store_dword v[2:3], v6
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1413,11 +1413,21 @@
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
- Known2, TLO, Depth + 1))
+
+ APInt DemandedBits0 = ~Known.One & DemandedBits;
+ if (SimplifyDemandedBits(Op0, DemandedBits0, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Try Op1 again now we have the KnownBits of Op0.
+ APInt DemandedBits1 = ~(Known2.One & DemandedBits0) & DemandedBits;
+ if (DemandedBits1 != DemandedBits)
+ if (SimplifyDemandedBits(Op1, DemandedBits1, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D127598.436222.patch
Type: text/x-patch
Size: 3903 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220612/973c724f/attachment.bin>
More information about the llvm-commits
mailing list