[llvm] cdb7b80 - [DAGCombiner] fold or (xor x, y),? patterns
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 22 17:28:16 PST 2022
Author: chenglin.bi
Date: 2022-11-23T09:28:10+08:00
New Revision: cdb7b804f665f4d250f6bad3941bcea68024e0a7
URL: https://github.com/llvm/llvm-project/commit/cdb7b804f665f4d250f6bad3941bcea68024e0a7
DIFF: https://github.com/llvm/llvm-project/commit/cdb7b804f665f4d250f6bad3941bcea68024e0a7.diff
LOG: [DAGCombiner] fold or (xor x, y),? patterns
or (xor x, y), x --> or x, y
or (xor x, y), y --> or x, y
or (xor x, y), (and x, y) --> or x, y
or (xor x, y), (or x, y) --> or x, y
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D138401
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/fshl.ll
llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/avx512bw-mask-op.ll
llvm/test/CodeGen/X86/avx512dq-mask-op.ll
llvm/test/CodeGen/X86/combine-sra-load.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cae40db5896b8..85a92d51d3b37 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7004,6 +7004,24 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
}
+ if (N0.getOpcode() == ISD::XOR) {
+ // fold or (xor x, y), x --> or x, y
+ // or (xor x, y), (x and/or y) --> or x, y
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (N00 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
+ if (N01 == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
+
+ if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+ if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
+ }
+ }
+
if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
return R;
diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll
index 61a065f19ef3e..ed94e9ac7f3c6 100644
--- a/llvm/test/CodeGen/AMDGPU/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshl.ll
@@ -704,11 +704,10 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_lshl_b32 s0, s2, 7
-; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: s_xor_b32 s1, s0, s3
-; SI-NEXT: s_or_b32 s0, s0, s1
+; SI-NEXT: s_or_b32 s0, s3, s0
; SI-NEXT: s_cmp_eq_u32 s0, 0
; SI-NEXT: s_cselect_b32 s0, s2, s3
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_mov_b32_e32 v0, s0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -718,8 +717,7 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_lshl_b32 s4, s2, 7
-; VI-NEXT: s_xor_b32 s5, s4, s3
-; VI-NEXT: s_or_b32 s4, s4, s5
+; VI-NEXT: s_or_b32 s4, s3, s4
; VI-NEXT: s_cmp_eq_u32 s4, 0
; VI-NEXT: s_cselect_b32 s2, s2, s3
; VI-NEXT: v_mov_b32_e32 v0, s0
@@ -734,8 +732,7 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshl_b32 s4, s2, 7
-; GFX9-NEXT: s_xor_b32 s5, s4, s3
-; GFX9-NEXT: s_or_b32 s4, s4, s5
+; GFX9-NEXT: s_or_b32 s4, s3, s4
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s2, s2, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s2
@@ -744,15 +741,14 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
;
; R600-LABEL: orxor2or1:
; R600: ; %bb.0:
-; R600-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: LSHL * T0.W, KC0[2].Z, literal.x,
; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00)
-; R600-NEXT: XOR_INT * T1.W, PV.W, KC0[2].W,
-; R600-NEXT: OR_INT * T0.W, T0.W, PV.W,
+; R600-NEXT: OR_INT * T0.W, KC0[2].W, PV.W,
; R600-NEXT: CNDE_INT T0.X, PV.W, KC0[2].Z, KC0[2].W,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
@@ -763,8 +759,7 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_lshl_b32 s4, s2, 7
-; GFX10-NEXT: s_xor_b32 s5, s4, s3
-; GFX10-NEXT: s_or_b32 s4, s4, s5
+; GFX10-NEXT: s_or_b32 s4, s3, s4
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s2, s2, s3
; GFX10-NEXT: v_mov_b32_e32 v1, s2
@@ -777,11 +772,10 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_lshl_b32 s4, s2, 7
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_xor_b32 s5, s4, s3
-; GFX11-NEXT: s_or_b32 s4, s4, s5
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_or_b32 s4, s3, s4
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s2, s2, s3
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
index b01412ba2e666..3b0592388a747 100644
--- a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
@@ -5,17 +5,11 @@
define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
; GCN-LABEL: xor3_i1_const:
; GCN: ; %bb.0: ; %main_body
-; GCN-NEXT: s_mov_b32 m0, s1
-; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000
-; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0
-; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x
-; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1
-; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
-; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc
-; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
-; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1]
-; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
+; GCN-NEXT: v_mov_b32_e32 v0, 0x42640000
+; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0
+; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
+; GCN-NEXT: s_and_b64 s[0:1], s[2:3], vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[0:1]
; GCN-NEXT: ; return to shader part epilog
main_body:
%tmp26 = fcmp nsz olt float %arg1, 0.000000e+00
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index f9d6ac8e9db12..81cfa94d8b1f1 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -151,21 +151,14 @@ define i16 @mand16(i16 %x, i16 %y) {
; CHECK-LABEL: mand16:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: andl %esi, %eax
-; CHECK-NEXT: xorl %esi, %edi
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
;
; X86-LABEL: mand16:
; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: andl %ecx, %edx
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: ## kill: def $ax killed $ax killed $eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orw {{[0-9]+}}(%esp), %ax
; X86-NEXT: retl
%ma = bitcast i16 %x to <16 x i1>
%mb = bitcast i16 %y to <16 x i1>
@@ -181,9 +174,7 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
; KNL: ## %bb.0:
; KNL-NEXT: kmovw (%rdi), %k0
; KNL-NEXT: kmovw (%rsi), %k1
-; KNL-NEXT: kandw %k1, %k0, %k2
-; KNL-NEXT: kxorw %k1, %k0, %k0
-; KNL-NEXT: korw %k0, %k2, %k0
+; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
; KNL-NEXT: retq
@@ -192,9 +183,7 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
; SKX: ## %bb.0:
; SKX-NEXT: kmovw (%rdi), %k0
; SKX-NEXT: kmovw (%rsi), %k1
-; SKX-NEXT: kandw %k1, %k0, %k2
-; SKX-NEXT: kxorw %k1, %k0, %k0
-; SKX-NEXT: korw %k0, %k2, %k0
+; SKX-NEXT: korw %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq
@@ -203,9 +192,7 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovw (%rdi), %k0
; AVX512BW-NEXT: kmovw (%rsi), %k1
-; AVX512BW-NEXT: kandw %k1, %k0, %k2
-; AVX512BW-NEXT: kxorw %k1, %k0, %k0
-; AVX512BW-NEXT: korw %k0, %k2, %k0
+; AVX512BW-NEXT: korw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: retq
@@ -214,9 +201,7 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw (%rdi), %k0
; AVX512DQ-NEXT: kmovw (%rsi), %k1
-; AVX512DQ-NEXT: kandw %k1, %k0, %k2
-; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
-; AVX512DQ-NEXT: korw %k0, %k2, %k0
+; AVX512DQ-NEXT: korw %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT: retq
@@ -227,9 +212,7 @@ define i16 @mand16_mem(ptr %x, ptr %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw (%ecx), %k0
; X86-NEXT: kmovw (%eax), %k1
-; X86-NEXT: kandw %k1, %k0, %k2
-; X86-NEXT: kxorw %k1, %k0, %k0
-; X86-NEXT: korw %k0, %k2, %k0
+; X86-NEXT: korw %k1, %k0, %k0
; X86-NEXT: kmovd %k0, %eax
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index fda40ee095897..2a26264483613 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -79,9 +79,7 @@ define i32 @mand32(i32 %x, i32 %y) {
; CHECK-LABEL: mand32:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: andl %esi, %eax
-; CHECK-NEXT: xorl %esi, %edi
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: retq
%ma = bitcast i32 %x to <32 x i1>
%mb = bitcast i32 %y to <32 x i1>
@@ -97,9 +95,7 @@ define i32 @mand32_mem(ptr %x, ptr %y) {
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd (%rdi), %k0
; CHECK-NEXT: kmovd (%rsi), %k1
-; CHECK-NEXT: kandd %k1, %k0, %k2
-; CHECK-NEXT: kxord %k1, %k0, %k0
-; CHECK-NEXT: kord %k0, %k2, %k0
+; CHECK-NEXT: kord %k1, %k0, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: retq
%ma = load <32 x i1>, ptr %x
@@ -115,9 +111,7 @@ define i64 @mand64(i64 %x, i64 %y) {
; CHECK-LABEL: mand64:
; CHECK: ## %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: andq %rsi, %rax
-; CHECK-NEXT: xorq %rsi, %rdi
-; CHECK-NEXT: orq %rdi, %rax
+; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: retq
%ma = bitcast i64 %x to <64 x i1>
%mb = bitcast i64 %y to <64 x i1>
@@ -133,9 +127,7 @@ define i64 @mand64_mem(ptr %x, ptr %y) {
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq (%rdi), %k0
; CHECK-NEXT: kmovq (%rsi), %k1
-; CHECK-NEXT: kandq %k1, %k0, %k2
-; CHECK-NEXT: kxorq %k1, %k0, %k0
-; CHECK-NEXT: korq %k0, %k2, %k0
+; CHECK-NEXT: korq %k1, %k0, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: retq
%ma = load <64 x i1>, ptr %x
diff --git a/llvm/test/CodeGen/X86/avx512dq-mask-op.ll b/llvm/test/CodeGen/X86/avx512dq-mask-op.ll
index d33751644a9e1..041a86aff53fb 100644
--- a/llvm/test/CodeGen/X86/avx512dq-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-mask-op.ll
@@ -33,9 +33,8 @@ define i8 @mand8(i8 %x, i8 %y) {
; CHECK-LABEL: mand8:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: andb %sil, %al
-; CHECK-NEXT: xorb %sil, %dil
-; CHECK-NEXT: orb %dil, %al
+; CHECK-NEXT: orl %esi, %eax
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%ma = bitcast i8 %x to <8 x i1>
%mb = bitcast i8 %y to <8 x i1>
@@ -51,9 +50,7 @@ define i8 @mand8_mem(ptr %x, ptr %y) {
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovb (%rdi), %k0
; CHECK-NEXT: kmovb (%rsi), %k1
-; CHECK-NEXT: kandb %k1, %k0, %k2
-; CHECK-NEXT: kxorb %k1, %k0, %k0
-; CHECK-NEXT: korb %k0, %k2, %k0
+; CHECK-NEXT: korb %k1, %k0, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll
index 4606f38332594..aa175f07154e2 100644
--- a/llvm/test/CodeGen/X86/combine-sra-load.ll
+++ b/llvm/test/CodeGen/X86/combine-sra-load.ll
@@ -93,11 +93,11 @@ define i32 @sra_to_sextload_multiple_sra_uses(ptr %p) {
; CHECK-NEXT: movswl 2(%rdi), %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: xorl $6, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: imull %ecx, %eax
; CHECK-NEXT: retq
%load = load i32, ptr %p
%shift = ashr i32 %load, 16
%use1 = xor i32 %shift, 6
- %use2 = or i32 %shift, %use1
+ %use2 = mul i32 %shift, %use1
ret i32 %use2
}
More information about the llvm-commits
mailing list