[llvm] 4a3708c - [SDAG] remove shift that is redundant with part of funnel shift

Thu Feb 24 08:26:01 PST 2022

Author: Sanjay Patel
Date: 2022-02-24T11:25:46-05:00
New Revision: 4a3708cd6b062a6afe2697cc8b39329e3f2faa25

URL: https://github.com/llvm/llvm-project/commit/4a3708cd6b062a6afe2697cc8b39329e3f2faa25
DIFF: https://github.com/llvm/llvm-project/commit/4a3708cd6b062a6afe2697cc8b39329e3f2faa25.diff

LOG: [SDAG] remove shift that is redundant with part of funnel shift

This is the SDAG translation of D120253 :
https://alive2.llvm.org/ce/z/qHpmNn

The SDAG nodes can have different operand types than the result value.
We can see an example of that with AArch64 - the funnel shift amount
is an i64 rather than i32.

We may need to make that match even more flexible to handle
post-legalization nodes, but I have not stepped into that yet.

Differential Revision: https://reviews.llvm.org/D120264

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/funnel-shift.ll
    llvm/test/CodeGen/RISCV/rv32zbp.ll
    llvm/test/CodeGen/X86/funnel-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3544729da2f71..483496a53a6e1 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6710,6 +6710,24 @@ static SDValue visitORCommutative(
       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
   }
 
+  auto peekThroughZext = [](SDValue V) {
+    if (V->getOpcode() == ISD::ZERO_EXTEND)
+      return V->getOperand(0);
+    return V;
+  };
+
+  // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+  if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
+      N0.getOperand(0) == N1.getOperand(0) &&
+      peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+    return N0;
+
+  // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
+  if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
+      N0.getOperand(1) == N1.getOperand(0) &&
+      peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+    return N0;
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index b4b4e37b4cba5..fbf00a59f3cf1 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -472,14 +472,12 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) {
 define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-LABEL: or_shl_fshl_simplify:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, w2
-; CHECK-NEXT:    mvn w9, w2
-; CHECK-NEXT:    lsr w10, w0, #1
-; CHECK-NEXT:    lsr w9, w10, w9
-; CHECK-NEXT:    lsl w8, w1, w8
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT:    mvn w8, w2
+; CHECK-NEXT:    lsr w9, w0, #1
 ; CHECK-NEXT:    lsl w10, w1, w2
-; CHECK-NEXT:    orr w8, w8, w9
-; CHECK-NEXT:    orr w0, w8, w10
+; CHECK-NEXT:    lsr w8, w9, w8
+; CHECK-NEXT:    orr w0, w10, w8
 ; CHECK-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
@@ -490,14 +488,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-LABEL: or_lshr_fshr_simplify:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, w2
-; CHECK-NEXT:    mvn w9, w2
-; CHECK-NEXT:    lsl w10, w0, #1
-; CHECK-NEXT:    lsr w8, w1, w8
-; CHECK-NEXT:    lsl w9, w10, w9
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT:    mvn w8, w2
+; CHECK-NEXT:    lsl w9, w0, #1
 ; CHECK-NEXT:    lsr w10, w1, w2
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    orr w0, w10, w8
+; CHECK-NEXT:    lsl w8, w9, w8
+; CHECK-NEXT:    orr w0, w8, w10
 ; CHECK-NEXT:    ret
   %shy = lshr i32 %y, %s
   %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 76464a5024484..be13bebd44f1e 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -3370,7 +3370,6 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; RV32I-NEXT:    srli a0, a0, 1
 ; RV32I-NEXT:    srl a0, a0, a2
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: or_shl_fshl_simplify:
@@ -3380,7 +3379,6 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; RV32ZBP-NEXT:    srli a0, a0, 1
 ; RV32ZBP-NEXT:    srl a0, a0, a2
 ; RV32ZBP-NEXT:    or a0, a1, a0
-; RV32ZBP-NEXT:    or a0, a0, a1
 ; RV32ZBP-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
@@ -3396,7 +3394,6 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    sll a0, a0, a2
 ; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: or_lshr_fshr_simplify:
@@ -3406,7 +3403,6 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 ; RV32ZBP-NEXT:    slli a0, a0, 1
 ; RV32ZBP-NEXT:    sll a0, a0, a2
 ; RV32ZBP-NEXT:    or a0, a0, a1
-; RV32ZBP-NEXT:    or a0, a1, a0
 ; RV32ZBP-NEXT:    ret
   %shy = lshr i32 %y, %s
   %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)

diff  --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 49cf2684c7a82..2e9deb51dbe07 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -1260,25 +1260,18 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
 define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_shl_fshl_simplify:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %esi
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movl %eax, %esi
-; X86-SSE2-NEXT:    shll %cl, %esi
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
-; X86-SSE2-NEXT:    orl %esi, %eax
-; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: or_shl_fshl_simplify:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edx, %ecx
 ; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shll %cl, %eax
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %edi, %esi
-; X64-AVX2-NEXT:    orl %esi, %eax
+; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
 ; X64-AVX2-NEXT:    retq
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
@@ -1289,25 +1282,18 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_lshr_fshr_simplify:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %esi
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movl %eax, %esi
-; X86-SSE2-NEXT:    shrl %cl, %esi
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
-; X86-SSE2-NEXT:    orl %esi, %eax
-; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: or_lshr_fshr_simplify:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edx, %ecx
 ; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shrl %cl, %eax
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %edi, %esi
-; X64-AVX2-NEXT:    orl %esi, %eax
+; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
 ; X64-AVX2-NEXT:    retq
   %shy = lshr i32 %y, %s
   %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)