[llvm] [DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) (PR #89897)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 24 04:04:26 PDT 2024


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/89897

>From 9402531d2d10a94bac1d729e262c20a6d6ce9ae8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 24 Apr 2024 10:15:09 +0100
Subject: [PATCH] [DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) ->
 srl/shl(x,y)

Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA)

Alive2: https://alive2.llvm.org/ce/z/fSH-rf
---
 llvm/include/llvm/CodeGen/SDPatternMatch.h    |   5 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  14 +
 llvm/test/CodeGen/RISCV/bitreverse-shift.ll   | 139 ++------
 llvm/test/CodeGen/X86/combine-bitreverse.ll   | 296 +-----------------
 4 files changed, 57 insertions(+), 397 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 4cc7bb9c3b55a9..7a0141d7c80763 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -616,6 +616,11 @@ inline UnaryOpc_match<Opnd, true> m_ChainedUnaryOp(unsigned Opc,
   return UnaryOpc_match<Opnd, true>(Opc, Op);
 }
 
+template <typename Opnd>
+inline UnaryOpc_match<Opnd> m_BitReverse(const Opnd &Op) {
+  return UnaryOpc_match<Opnd>(ISD::BITREVERSE, Op);
+}
+
 template <typename Opnd> inline UnaryOpc_match<Opnd> m_ZExt(const Opnd &Op) {
   return UnaryOpc_match<Opnd>(ISD::ZERO_EXTEND, Op);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fd265b12d73ca4..d1176bd937b0b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10950,9 +10950,23 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
   // fold (bitreverse c1) -> c2
   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
     return C;
+
   // fold (bitreverse (bitreverse x)) -> x
   if (N0.getOpcode() == ISD::BITREVERSE)
     return N0.getOperand(0);
+
+  SDValue X, Y;
+
+  // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
+      sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+    return DAG.getNode(ISD::SHL, DL, VT, X, Y);
+
+  // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
+      sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+    return DAG.getNode(ISD::SRL, DL, VT, X, Y);
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
index b0281ba7d23854..92610f22c4b725 100644
--- a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
+++ b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV32ZBKB
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32ZBKB
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV64ZBKB
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64ZBKB
 
-; TODO: These tests can be optimised
+; These tests can be optimised
 ;       fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
 ;       fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
 
@@ -14,25 +14,10 @@ declare i32 @llvm.bitreverse.i32(i32)
 declare i64 @llvm.bitreverse.i64(i64)
 
 define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 27
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 24
-; RV32ZBKB-NEXT:    ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 59
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 56
-; RV64ZBKB-NEXT:    ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    ret
     %1 = call i8 @llvm.bitreverse.i8(i8 %a)
     %2 = lshr i8 %1, 3
     %3 = call i8 @llvm.bitreverse.i8(i8 %2)
@@ -40,25 +25,10 @@ define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
 }
 
 define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 23
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 16
-; RV32ZBKB-NEXT:    ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 55
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 48
-; RV64ZBKB-NEXT:    ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 7
+; CHECK-NEXT:    ret
     %1 = call i16 @llvm.bitreverse.i16(i16 %a)
     %2 = lshr i16 %1, 7
     %3 = call i16 @llvm.bitreverse.i16(i16 %2)
@@ -68,21 +38,12 @@ define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
 define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 15
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    slli a0, a0, 15
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 47
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    slliw a0, a0, 15
 ; RV64ZBKB-NEXT:    ret
     %1 = call i32 @llvm.bitreverse.i32(i32 %a)
     %2 = lshr i32 %1, 15
@@ -93,21 +54,13 @@ define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
 define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 1
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a1, a0
+; RV32ZBKB-NEXT:    slli a1, a0, 1
 ; RV32ZBKB-NEXT:    li a0, 0
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 33
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    slli a0, a0, 33
 ; RV64ZBKB-NEXT:    ret
     %1 = call i64 @llvm.bitreverse.i64(i64 %a)
     %2 = lshr i64 %1, 33
@@ -118,24 +71,14 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
 define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 24
-; RV32ZBKB-NEXT:    slli a0, a0, 3
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 24
+; RV32ZBKB-NEXT:    slli a0, a0, 24
+; RV32ZBKB-NEXT:    srli a0, a0, 27
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 56
-; RV64ZBKB-NEXT:    slli a0, a0, 3
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 56
+; RV64ZBKB-NEXT:    slli a0, a0, 56
+; RV64ZBKB-NEXT:    srli a0, a0, 59
 ; RV64ZBKB-NEXT:    ret
     %1 = call i8 @llvm.bitreverse.i8(i8 %a)
     %2 = shl i8 %1, 3
@@ -146,24 +89,14 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
 define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 16
-; RV32ZBKB-NEXT:    slli a0, a0, 7
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 16
+; RV32ZBKB-NEXT:    slli a0, a0, 16
+; RV32ZBKB-NEXT:    srli a0, a0, 23
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 48
-; RV64ZBKB-NEXT:    slli a0, a0, 7
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    slli a0, a0, 48
+; RV64ZBKB-NEXT:    srli a0, a0, 55
 ; RV64ZBKB-NEXT:    ret
     %1 = call i16 @llvm.bitreverse.i16(i16 %a)
     %2 = shl i16 %1, 7
@@ -174,22 +107,12 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
 define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    slli a0, a0, 15
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 15
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 32
-; RV64ZBKB-NEXT:    slli a0, a0, 15
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    srliw a0, a0, 15
 ; RV64ZBKB-NEXT:    ret
     %1 = call i32 @llvm.bitreverse.i32(i32 %a)
     %2 = shl i32 %1, 15
@@ -200,21 +123,13 @@ define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
 define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a1
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    slli a0, a0, 1
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a1, 1
 ; RV32ZBKB-NEXT:    li a1, 0
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    slli a0, a0, 33
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 33
 ; RV64ZBKB-NEXT:    ret
     %1 = call i64 @llvm.bitreverse.i64(i64 %a)
     %2 = shl i64 %1, 33
diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll
index 9f81fab54a49d0..f3d4d691b453ba 100644
--- a/llvm/test/CodeGen/X86/combine-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll
@@ -39,86 +39,18 @@ define i32 @test_bitreverse_bitreverse(i32 %a0) nounwind {
   ret i32 %c
 }
 
-; TODO: fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
+; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
 define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
 ; X86-LABEL: test_bitreverse_srli_bitreverse:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655744, %ecx # imm = 0x55555540
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655680, %eax # imm = 0x55555500
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    shrl $7, %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645121, %ecx # imm = 0xF0F0F01
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645120, %eax # imm = 0xF0F0F00
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993424, %ecx # imm = 0x33333310
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993408, %eax # imm = 0x33333300
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    shll $7, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bitreverse_srli_bitreverse:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    bswapl %edi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %edi
-; X64-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    shrl $2, %edi
-; X64-NEXT:    andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT:    leal (%rdi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $1431655744, %ecx # imm = 0x55555540
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655680, %eax # imm = 0x55555500
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-NEXT:    shrl $7, %eax
-; X64-NEXT:    bswapl %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $252645121, %ecx # imm = 0xF0F0F01
-; X64-NEXT:    shll $4, %ecx
-; X64-NEXT:    shrl $4, %eax
-; X64-NEXT:    andl $252645120, %eax # imm = 0xF0F0F00
-; X64-NEXT:    orl %ecx, %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $858993424, %ecx # imm = 0x33333310
-; X64-NEXT:    shrl $2, %eax
-; X64-NEXT:    andl $858993408, %eax # imm = 0x33333300
-; X64-NEXT:    leal (%rax,%rcx,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NEXT:    shll $7, %eax
 ; X64-NEXT:    retq
   %b = call i32 @llvm.bitreverse.i32(i32 %a0)
   %c = lshr i32 %b, 7
@@ -129,88 +61,15 @@ define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
 define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
 ; X86-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655764, %eax # imm = 0x55555554
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645127, %eax # imm = 0xF0F0F07
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993457, %eax # imm = 0x33333331
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    addl %edx, %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    bswapq %rdi
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    andq %rcx, %rdi
-; X64-NEXT:    shlq $4, %rdi
-; X64-NEXT:    orq %rax, %rdi
-; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT:    movq %rdi, %rcx
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq $2, %rdi
-; X64-NEXT:    andq %rax, %rdi
-; X64-NEXT:    leaq (%rdi,%rcx,4), %rax
-; X64-NEXT:    movabsq $6148914689804861440, %rcx # imm = 0x5555555500000000
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq %rax
-; X64-NEXT:    movabsq $6148914685509894144, %rdx # imm = 0x5555555400000000
-; X64-NEXT:    andq %rax, %rdx
-; X64-NEXT:    leaq (%rdx,%rcx,2), %rax
-; X64-NEXT:    shrq $33, %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    movabsq $1085102592318504960, %rcx # imm = 0xF0F0F0F00000000
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    movabsq $1085102557958766592, %rdx # imm = 0xF0F0F0700000000
-; X64-NEXT:    andq %rax, %rdx
-; X64-NEXT:    shlq $4, %rcx
-; X64-NEXT:    orq %rdx, %rcx
-; X64-NEXT:    movabsq $3689348813882916864, %rax # imm = 0x3333333300000000
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    shrq $2, %rcx
-; X64-NEXT:    movabsq $3689348805292982272, %rdx # imm = 0x3333333100000000
-; X64-NEXT:    andq %rcx, %rdx
-; X64-NEXT:    leaq (%rdx,%rax,4), %rax
-; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT:    movq %rax, %rdx
-; X64-NEXT:    andq %rcx, %rdx
-; X64-NEXT:    shrq %rax
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-NEXT:    shlq $33, %rax
 ; X64-NEXT:    retq
     %1 = call i64 @llvm.bitreverse.i64(i64 %a)
     %2 = lshr i64 %1, 33
@@ -218,86 +77,18 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
     ret i64 %3
 }
 
-; TODO: fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
+; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
 define i32 @test_bitreverse_shli_bitreverse(i32 %a0) nounwind {
 ; X86-LABEL: test_bitreverse_shli_bitreverse:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %ecx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    andl $5592405, %eax # imm = 0x555555
-; X86-NEXT:    shll $6, %ecx
-; X86-NEXT:    andl $-1431655808, %ecx # imm = 0xAAAAAA80
-; X86-NEXT:    shll $8, %eax
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $986895, %ecx # imm = 0xF0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $135204623, %eax # imm = 0x80F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $3355443, %ecx # imm = 0x333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $36909875, %eax # imm = 0x2333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    shrl $7, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bitreverse_shli_bitreverse:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    bswapl %edi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %edi
-; X64-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    shrl $2, %edi
-; X64-NEXT:    andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT:    leal (%rdi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $5592405, %ecx # imm = 0x555555
-; X64-NEXT:    shll $6, %eax
-; X64-NEXT:    andl $-1431655808, %eax # imm = 0xAAAAAA80
-; X64-NEXT:    shll $8, %ecx
-; X64-NEXT:    orl %eax, %ecx
-; X64-NEXT:    bswapl %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    andl $986895, %eax # imm = 0xF0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %ecx
-; X64-NEXT:    andl $135204623, %ecx # imm = 0x80F0F0F
-; X64-NEXT:    orl %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    andl $3355443, %eax # imm = 0x333333
-; X64-NEXT:    shrl $2, %ecx
-; X64-NEXT:    andl $36909875, %ecx # imm = 0x2333333
-; X64-NEXT:    leal (%rcx,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NEXT:    shrl $7, %eax
 ; X64-NEXT:    retq
   %b = call i32 @llvm.bitreverse.i32(i32 %a0)
   %c = shl i32 %b, 7
@@ -309,79 +100,14 @@ define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
 ; X86-LABEL: test_bitreverse_shli_bitreverse_i64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $357913941, %ecx # imm = 0x15555555
-; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $235867919, %ecx # imm = 0xE0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $590558003, %ecx # imm = 0x23333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
 ; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bitreverse_shli_bitreverse_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    bswapq %rdi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %edi
-; X64-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    shrl $2, %edi
-; X64-NEXT:    andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT:    leal (%rdi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $357913941, %ecx # imm = 0x15555555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-NEXT:    shlq $33, %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $235867919, %ecx # imm = 0xE0F0F0F
-; X64-NEXT:    shll $4, %ecx
-; X64-NEXT:    shrl $4, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    orl %ecx, %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $590558003, %ecx # imm = 0x23333333
-; X64-NEXT:    shrl $2, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    leal (%rax,%rcx,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $357913941, %ecx # imm = 0x15555555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $33, %rax
 ; X64-NEXT:    retq
     %1 = call i64 @llvm.bitreverse.i64(i64 %a)
     %2 = shl i64 %1, 33



More information about the llvm-commits mailing list