[llvm] d069ac0 - [DAGCombiner] Add bswap(logic_op(bswap(x), y)) optimization

Noah Goldstein via llvm-commits llvm-commits at lists.llvm.org
Tue May 16 16:58:36 PDT 2023


Author: Austin Chang
Date: 2023-05-16T18:58:07-05:00
New Revision: d069ac035add3095c771f49540223f98e5ba10b9

URL: https://github.com/llvm/llvm-project/commit/d069ac035add3095c771f49540223f98e5ba10b9
DIFF: https://github.com/llvm/llvm-project/commit/d069ac035add3095c771f49540223f98e5ba10b9.diff

LOG: [DAGCombiner] Add bswap(logic_op(bswap(x), y)) optimization

This is the implementation of D149782

The patch implements a helper function that matches and fold the following cases in the DAGCombiner:

1. `bswap(logic_op(x, bswap(y))) -> logic_op(bswap(x), y)`
2. `bswap(logic_op(bswap(x), y)) -> logic_op(x, bswap(y))`
3. `bswap(logic_op(bswap(x), bswap(y))) -> logic_op(x, y)` in multiuse case, which still reduces the number of instructions.

The helper function accepts SDValue with BSWAP and BITREVERSE opcode. This patch folds the BSWAP cases and remain the BITREVERSE optimization in the future

Reviewed By: RKSimon, goldstein.w.n

Differential Revision: https://reviews.llvm.org/D149783

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/ARM/combine-bswap.ll
    llvm/test/CodeGen/X86/combine-bswap.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 26eef55dd9ed8..85da5f4cc0a64 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9981,6 +9981,42 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
                                      : DAG.getZExtOrTrunc(Result, DL, WideVT));
 }
 
+// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
+// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
+static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opcode = N->getOpcode();
+  if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
+    SDValue OldLHS = N0.getOperand(0);
+    SDValue OldRHS = N0.getOperand(1);
+
+    // If both operands are bswap/bitreverse, ignore the multiuse
+    // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
+    if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
+      return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+                         OldRHS.getOperand(0));
+    }
+
+    if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
+      SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
+      return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+                         NewBitReorder);
+    }
+
+    if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
+      SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
+      return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
+                         OldRHS.getOperand(0));
+    }
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -10766,6 +10802,9 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
     }
   }
 
+  if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
+    return V;
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/ARM/combine-bswap.ll b/llvm/test/CodeGen/ARM/combine-bswap.ll
index f1037f8da12a0..16a1d79de28ad 100644
--- a/llvm/test/CodeGen/ARM/combine-bswap.ll
+++ b/llvm/test/CodeGen/ARM/combine-bswap.ll
@@ -8,9 +8,8 @@ declare i32 @llvm.bitreverse.i32(i32) readnone
 define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
 ; CHECK-LABEL: bs_and_lhs_bs32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    rev r0, r0
+; CHECK-NEXT:    rev r1, r1
 ; CHECK-NEXT:    ands r0, r1
-; CHECK-NEXT:    rev r0, r0
 ; CHECK-NEXT:    bx lr
   %1 = tail call i32 @llvm.bswap.i32(i32 %a)
   %2 = and i32 %1, %b
@@ -21,12 +20,10 @@ define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
 define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 {
 ; CHECK-LABEL: bs_or_rhs_bs64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    rev r2, r2
-; CHECK-NEXT:    orrs r1, r2
-; CHECK-NEXT:    rev r2, r1
-; CHECK-NEXT:    rev r1, r3
-; CHECK-NEXT:    orrs r0, r1
-; CHECK-NEXT:    rev r1, r0
+; CHECK-NEXT:    rev r1, r1
+; CHECK-NEXT:    rev r0, r0
+; CHECK-NEXT:    orrs r2, r1
+; CHECK-NEXT:    orr.w r1, r0, r3
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    bx lr
   %1 = tail call i64 @llvm.bswap.i64(i64 %b)
@@ -38,10 +35,9 @@ define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 {
 define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 {
 ; CHECK-LABEL: bs_and_all_operand_multiuse:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    rev r1, r1
-; CHECK-NEXT:    rev r0, r0
 ; CHECK-NEXT:    and.w r2, r0, r1
-; CHECK-NEXT:    rev r2, r2
+; CHECK-NEXT:    rev r0, r0
+; CHECK-NEXT:    rev r1, r1
 ; CHECK-NEXT:    muls r0, r2, r0
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr

diff  --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll
index a4e7e3aaba95e..1f074c877f3ae 100644
--- a/llvm/test/CodeGen/X86/combine-bswap.ll
+++ b/llvm/test/CodeGen/X86/combine-bswap.ll
@@ -261,15 +261,13 @@ define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    bswapl %eax
 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: bs_and_lhs_bs32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    bswapl %eax
-; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    bswapl %eax
+; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
   %1 = tail call i32 @llvm.bswap.i32(i32 %a)
   %2 = and i32 %1, %b
@@ -280,22 +278,19 @@ define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
 define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 {
 ; X86-LABEL: bs_or_lhs_bs64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    bswapl %eax
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
 ; X86-NEXT:    bswapl %edx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: bs_or_lhs_bs64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    bswapq %rax
+; X64-NEXT:    orq %rdi, %rax
 ; X64-NEXT:    retq
   %1 = tail call i64 @llvm.bswap.i64(i64 %a)
   %2 = or i64 %1, %b
@@ -306,22 +301,19 @@ define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 {
 define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 {
 ; X86-LABEL: bs_xor_rhs_bs64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    bswapl %eax
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
 ; X86-NEXT:    bswapl %edx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: bs_xor_rhs_bs64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    bswapq %rax
+; X64-NEXT:    xorq %rsi, %rax
 ; X64-NEXT:    retq
   %1 = tail call i64 @llvm.bswap.i64(i64 %b)
   %2 = xor i64 %a, %1
@@ -332,25 +324,23 @@ define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 {
 define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 {
 ; X86-LABEL: bs_and_all_operand_multiuse:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    bswapl %ecx
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    bswapl %edx
-; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    bswapl %ecx
 ; X86-NEXT:    imull %edx, %eax
+; X86-NEXT:    imull %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: bs_and_all_operand_multiuse:
 ; X64:       # %bb.0:
-; X64-NEXT:    bswapl %edi
-; X64-NEXT:    bswapl %esi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    bswapl %eax
-; X64-NEXT:    imull %edi, %esi
+; X64-NEXT:    andl %esi, %edi
+; X64-NEXT:    bswapl %esi
+; X64-NEXT:    imull %edi, %eax
 ; X64-NEXT:    imull %esi, %eax
 ; X64-NEXT:    retq
   %1 = tail call i32 @llvm.bswap.i32(i32 %a)


        


More information about the llvm-commits mailing list