[llvm] e9302bf - [SDAG] try harder to remove a rotate from X == 0

Thu Mar 3 06:26:00 PST 2022

Author: Sanjay Patel
Date: 2022-03-03T09:25:46-05:00
New Revision: e9302bf7efc70a26cfa620ffaa614dc70cdb0259

URL: https://github.com/llvm/llvm-project/commit/e9302bf7efc70a26cfa620ffaa614dc70cdb0259
DIFF: https://github.com/llvm/llvm-project/commit/e9302bf7efc70a26cfa620ffaa614dc70cdb0259.diff

LOG: [SDAG] try harder to remove a rotate from X == 0

https://alive2.llvm.org/ce/z/mJP7XP

This can be viewed as expanding the compare into and/or-of-compares:
https://alive2.llvm.org/ce/z/bkZYWE
followed by reduction of each compare.

This could be extended in several ways:
1. There's a (X & Y) == -1 sibling.
2. We can recurse through more than 1 'or'.
3. The fold could be generalized beyond rotates - any operation that
   only changes the order of bits (bswap, bitreverse).

This is a transform noted in D111530.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/X86/legalize-shift.ll
    llvm/test/CodeGen/X86/setcc-fsh.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 18016e93fbdd0..093fe58106b26 100644

--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3835,6 +3835,24 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
   if (SDValue R = getRotateSource(N0))
     return DAG.getSetCC(dl, VT, R, N1, Cond);
 
+  // Peek through an 'or' of a rotated value compared against 0:
+  // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
+  // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
+  //
+  // TODO: Add the 'and' with -1 sibling.
+  // TODO: Recurse through a series of 'or' ops to find the rotate.
+  EVT OpVT = N0.getValueType();
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
+    if (SDValue R = getRotateSource(N0.getOperand(0))) {
+      SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
+      return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+    }
+    if (SDValue R = getRotateSource(N0.getOperand(1))) {
+      SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
+      return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+    }
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/X86/legalize-shift.ll b/llvm/test/CodeGen/X86/legalize-shift.ll
index 8113311134ab3..bc5764e586dec 100644
--- a/llvm/test/CodeGen/X86/legalize-shift.ll
+++ b/llvm/test/CodeGen/X86/legalize-shift.ll
@@ -5,16 +5,18 @@
 define void @PR36250() nounwind {
 ; X86-LABEL: PR36250:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    roll %ecx
-; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    orl %ecx, %edx
-; X86-NEXT:    orl %ecx, %edx
-; X86-NEXT:    orl %eax, %edx
-; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    leal (%eax,%eax), %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    orl %eax, %esi
 ; X86-NEXT:    sete (%eax)
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR36250:
@@ -22,11 +24,10 @@ define void @PR36250() nounwind {
 ; X64-NEXT:    movq (%rax), %rax
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    rolq %rcx
-; X64-NEXT:    addq %rax, %rax
-; X64-NEXT:    movq %rcx, %rdx
-; X64-NEXT:    orq %rcx, %rdx
-; X64-NEXT:    orq %rax, %rdx
-; X64-NEXT:    orq %rcx, %rdx
+; X64-NEXT:    leaq (%rax,%rax), %rdx
+; X64-NEXT:    orq %rcx, %rcx
+; X64-NEXT:    orq %rdx, %rcx
+; X64-NEXT:    orq %rax, %rcx
 ; X64-NEXT:    sete (%rax)
 ; X64-NEXT:    retq
    %1 = load i448, i448* undef

diff  --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll
index f42f1ea5a96bf..a345cf30f9d2e 100644
--- a/llvm/test/CodeGen/X86/setcc-fsh.ll
+++ b/llvm/test/CodeGen/X86/setcc-fsh.ll
@@ -188,9 +188,6 @@ define i1 @fshl_eq_n1(i8 %x, i8 %y, i8 %z) nounwind {
 define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
 ; CHECK-LABEL: or_rotl_eq_0:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    rolb %cl, %dil
 ; CHECK-NEXT:    orb %sil, %dil
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
@@ -203,9 +200,6 @@ define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
 define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-LABEL: or_rotr_ne_0:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdx, %rcx
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
-; CHECK-NEXT:    rorq %cl, %rdi
 ; CHECK-NEXT:    orq %rsi, %rdi
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
@@ -215,6 +209,8 @@ define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
   ret i1 %r
 }
 
+; negative test - wrong constant
+
 define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
 ; CHECK-LABEL: or_rotl_ne_n1:
 ; CHECK:       # %bb.0:
@@ -231,6 +227,8 @@ define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
   ret i1 %r
 }
 
+; negative test - extra use
+
 define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
 ; CHECK-LABEL: or_rotl_ne_0_use:
 ; CHECK:       # %bb.0:
@@ -254,25 +252,9 @@ define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
 define <4 x i1> @or_rotl_ne_eq0(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; CHECK-LABEL: or_rotl_ne_eq0:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [31,31,31,31]
-; CHECK-NEXT:    pand %xmm1, %xmm2
-; CHECK-NEXT:    pslld $23, %xmm2
-; CHECK-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; CHECK-NEXT:    cvttps2dq %xmm2, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-NEXT:    pmuludq %xmm2, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pmuludq %xmm3, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; CHECK-NEXT:    por %xmm1, %xmm4
-; CHECK-NEXT:    por %xmm0, %xmm4
-; CHECK-NEXT:    pxor %xmm0, %xmm0
-; CHECK-NEXT:    pcmpeqd %xmm4, %xmm0
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %rot = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>%x, <4 x i32> %x, <4 x i32> %y)
   %or = or <4 x i32> %y, %rot