[llvm] e9302bf - [SDAG] try harder to remove a rotate from X == 0
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 3 06:26:00 PST 2022
Author: Sanjay Patel
Date: 2022-03-03T09:25:46-05:00
New Revision: e9302bf7efc70a26cfa620ffaa614dc70cdb0259
URL: https://github.com/llvm/llvm-project/commit/e9302bf7efc70a26cfa620ffaa614dc70cdb0259
DIFF: https://github.com/llvm/llvm-project/commit/e9302bf7efc70a26cfa620ffaa614dc70cdb0259.diff
LOG: [SDAG] try harder to remove a rotate from X == 0
https://alive2.llvm.org/ce/z/mJP7XP
This can be viewed as expanding the compare into and/or-of-compares:
https://alive2.llvm.org/ce/z/bkZYWE
followed by reduction of each compare.
This could be extended in several ways:
1. There's a (X & Y) == -1 sibling.
2. We can recurse through more than 1 'or'.
3. The fold could be generalized beyond rotates - any operation that
only changes the order of bits (bswap, bitreverse).
This is a transform noted in D111530.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/legalize-shift.ll
llvm/test/CodeGen/X86/setcc-fsh.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 18016e93fbdd0..093fe58106b26 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3835,6 +3835,24 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
if (SDValue R = getRotateSource(N0))
return DAG.getSetCC(dl, VT, R, N1, Cond);
+ // Peek through an 'or' of a rotated value compared against 0:
+ // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
+ // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
+ //
+ // TODO: Add the 'and' with -1 sibling.
+ // TODO: Recurse through a series of 'or' ops to find the rotate.
+ EVT OpVT = N0.getValueType();
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
+ if (SDValue R = getRotateSource(N0.getOperand(0))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (SDValue R = getRotateSource(N0.getOperand(1))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/legalize-shift.ll b/llvm/test/CodeGen/X86/legalize-shift.ll
index 8113311134ab3..bc5764e586dec 100644
--- a/llvm/test/CodeGen/X86/legalize-shift.ll
+++ b/llvm/test/CodeGen/X86/legalize-shift.ll
@@ -5,16 +5,18 @@
define void @PR36250() nounwind {
; X86-LABEL: PR36250:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: roll %ecx
-; X86-NEXT: addl %eax, %eax
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: leal (%eax,%eax), %edx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: orl %eax, %esi
; X86-NEXT: sete (%eax)
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: PR36250:
@@ -22,11 +24,10 @@ define void @PR36250() nounwind {
; X64-NEXT: movq (%rax), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: rolq %rcx
-; X64-NEXT: addq %rax, %rax
-; X64-NEXT: movq %rcx, %rdx
-; X64-NEXT: orq %rcx, %rdx
-; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: orq %rcx, %rdx
+; X64-NEXT: leaq (%rax,%rax), %rdx
+; X64-NEXT: orq %rcx, %rcx
+; X64-NEXT: orq %rdx, %rcx
+; X64-NEXT: orq %rax, %rcx
; X64-NEXT: sete (%rax)
; X64-NEXT: retq
%1 = load i448, i448* undef
diff --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll
index f42f1ea5a96bf..a345cf30f9d2e 100644
--- a/llvm/test/CodeGen/X86/setcc-fsh.ll
+++ b/llvm/test/CodeGen/X86/setcc-fsh.ll
@@ -188,9 +188,6 @@ define i1 @fshl_eq_n1(i8 %x, i8 %y, i8 %z) nounwind {
define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
; CHECK-LABEL: or_rotl_eq_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edx, %ecx
-; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: rolb %cl, %dil
; CHECK-NEXT: orb %sil, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
@@ -203,9 +200,6 @@ define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
; CHECK-LABEL: or_rotr_ne_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rdx, %rcx
-; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT: rorq %cl, %rdi
; CHECK-NEXT: orq %rsi, %rdi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
@@ -215,6 +209,8 @@ define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
ret i1 %r
}
+; negative test - wrong constant
+
define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
; CHECK-LABEL: or_rotl_ne_n1:
; CHECK: # %bb.0:
@@ -231,6 +227,8 @@ define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
ret i1 %r
}
+; negative test - extra use
+
define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
; CHECK-LABEL: or_rotl_ne_0_use:
; CHECK: # %bb.0:
@@ -254,25 +252,9 @@ define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
define <4 x i1> @or_rotl_ne_eq0(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: or_rotl_ne_eq0:
; CHECK: # %bb.0:
-; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31]
-; CHECK-NEXT: pand %xmm1, %xmm2
-; CHECK-NEXT: pslld $23, %xmm2
-; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; CHECK-NEXT: cvttps2dq %xmm2, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-NEXT: pmuludq %xmm2, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
-; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-NEXT: pmuludq %xmm3, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-NEXT: por %xmm1, %xmm4
-; CHECK-NEXT: por %xmm0, %xmm4
-; CHECK-NEXT: pxor %xmm0, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm4, %xmm0
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: retq
%rot = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>%x, <4 x i32> %x, <4 x i32> %y)
%or = or <4 x i32> %y, %rot
More information about the llvm-commits
mailing list