[llvm] [DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) (PR #89897)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 24 02:16:55 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/89897
Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA)
Alive2: https://alive2.llvm.org/ce/z/fSH-rf
>From 266e085066ebaf1004288326c69b5252ab316437 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 24 Apr 2024 10:15:09 +0100
Subject: [PATCH] [DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) ->
srl/shl(x,y)
Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA)
Alive2: https://alive2.llvm.org/ce/z/fSH-rf
---
llvm/include/llvm/CodeGen/SDPatternMatch.h | 4 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +
llvm/test/CodeGen/RISCV/bitreverse-shift.ll | 139 ++------
llvm/test/CodeGen/X86/combine-bitreverse.ll | 296 +-----------------
4 files changed, 56 insertions(+), 397 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 4cc7bb9c3b55a9..2b5ef68445b768 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -616,6 +616,10 @@ inline UnaryOpc_match<Opnd, true> m_ChainedUnaryOp(unsigned Opc,
return UnaryOpc_match<Opnd, true>(Opc, Op);
}
+template <typename Opnd> inline UnaryOpc_match<Opnd> m_BitReverse(const Opnd &Op) {
+ return UnaryOpc_match<Opnd>(ISD::BITREVERSE, Op);
+}
+
template <typename Opnd> inline UnaryOpc_match<Opnd> m_ZExt(const Opnd &Op) {
return UnaryOpc_match<Opnd>(ISD::ZERO_EXTEND, Op);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fd265b12d73ca4..d1176bd937b0b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10950,9 +10950,23 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
// fold (bitreverse c1) -> c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
return C;
+
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
+
+ SDValue X, Y;
+
+ // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
+ sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ return DAG.getNode(ISD::SHL, DL, VT, X, Y);
+
+ // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
+ sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ return DAG.getNode(ISD::SRL, DL, VT, X, Y);
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
index f29b1699172626..704ca458535249 100644
--- a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
+++ b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV32ZBKB
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV32ZBKB
; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV64ZBKB
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBKB
-; TODO: These tests can be optmised
+; These tests can be optmised
; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
@@ -14,25 +14,10 @@ declare i32 @llvm.bitreverse.i32(i32)
declare i64 @llvm.bitreverse.i64(i64)
define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 27
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 24
-; RV32ZBKB-NEXT: ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 59
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 56
-; RV64ZBKB-NEXT: ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: ret
%1 = call i8 @llvm.bitreverse.i8(i8 %a)
%2 = lshr i8 %1, 3
%3 = call i8 @llvm.bitreverse.i8(i8 %2)
@@ -40,25 +25,10 @@ define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
}
define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 23
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 16
-; RV32ZBKB-NEXT: ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 55
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 48
-; RV64ZBKB-NEXT: ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 7
+; CHECK-NEXT: ret
%1 = call i16 @llvm.bitreverse.i16(i16 %a)
%2 = lshr i16 %1, 7
%3 = call i16 @llvm.bitreverse.i16(i16 %2)
@@ -68,21 +38,12 @@ define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 15
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: slli a0, a0, 15
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 47
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: slliw a0, a0, 15
; RV64ZBKB-NEXT: ret
%1 = call i32 @llvm.bitreverse.i32(i32 %a)
%2 = lshr i32 %1, 15
@@ -93,21 +54,13 @@ define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 1
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a1, a0
+; RV32ZBKB-NEXT: slli a1, a0, 1
; RV32ZBKB-NEXT: li a0, 0
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 33
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: slli a0, a0, 33
; RV64ZBKB-NEXT: ret
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = lshr i64 %1, 33
@@ -118,24 +71,14 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 24
-; RV32ZBKB-NEXT: slli a0, a0, 3
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 24
+; RV32ZBKB-NEXT: slli a0, a0, 24
+; RV32ZBKB-NEXT: srli a0, a0, 27
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 56
-; RV64ZBKB-NEXT: slli a0, a0, 3
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 56
+; RV64ZBKB-NEXT: slli a0, a0, 56
+; RV64ZBKB-NEXT: srli a0, a0, 59
; RV64ZBKB-NEXT: ret
%1 = call i8 @llvm.bitreverse.i8(i8 %a)
%2 = shl i8 %1, 3
@@ -146,24 +89,14 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 16
-; RV32ZBKB-NEXT: slli a0, a0, 7
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 16
+; RV32ZBKB-NEXT: slli a0, a0, 16
+; RV32ZBKB-NEXT: srli a0, a0, 23
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 48
-; RV64ZBKB-NEXT: slli a0, a0, 7
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 48
+; RV64ZBKB-NEXT: slli a0, a0, 48
+; RV64ZBKB-NEXT: srli a0, a0, 55
; RV64ZBKB-NEXT: ret
%1 = call i16 @llvm.bitreverse.i16(i16 %a)
%2 = shl i16 %1, 7
@@ -174,22 +107,12 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: slli a0, a0, 15
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 15
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 32
-; RV64ZBKB-NEXT: slli a0, a0, 15
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: srliw a0, a0, 15
; RV64ZBKB-NEXT: ret
%1 = call i32 @llvm.bitreverse.i32(i32 %a)
%2 = shl i32 %1, 15
@@ -200,21 +123,13 @@ define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a1
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: slli a0, a0, 1
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a1, 1
; RV32ZBKB-NEXT: li a1, 0
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: slli a0, a0, 33
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 33
; RV64ZBKB-NEXT: ret
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = shl i64 %1, 33
diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll
index 9f81fab54a49d0..f3d4d691b453ba 100644
--- a/llvm/test/CodeGen/X86/combine-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll
@@ -39,86 +39,18 @@ define i32 @test_bitreverse_bitreverse(i32 %a0) nounwind {
ret i32 %c
}
-; TODO: fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
+; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
; X86-LABEL: test_bitreverse_srli_bitreverse:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655744, %ecx # imm = 0x55555540
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655680, %eax # imm = 0x55555500
-; X86-NEXT: leal (%eax,%ecx,2), %eax
-; X86-NEXT: shrl $7, %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645121, %ecx # imm = 0xF0F0F01
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645120, %eax # imm = 0xF0F0F00
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993424, %ecx # imm = 0x33333310
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NEXT: shll $7, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_srli_bitreverse:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: bswapl %edi
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: shrl $4, %edi
-; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT: orl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: leal (%rdi,%rax,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $1431655744, %ecx # imm = 0x55555540
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655680, %eax # imm = 0x55555500
-; X64-NEXT: leal (%rax,%rcx,2), %eax
-; X64-NEXT: shrl $7, %eax
-; X64-NEXT: bswapl %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $252645121, %ecx # imm = 0xF0F0F01
-; X64-NEXT: shll $4, %ecx
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: andl $252645120, %eax # imm = 0xF0F0F00
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $858993424, %ecx # imm = 0x33333310
-; X64-NEXT: shrl $2, %eax
-; X64-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X64-NEXT: leal (%rax,%rcx,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: leal (%rax,%rcx,2), %eax
+; X64-NEXT: shll $7, %eax
; X64-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a0)
%c = lshr i32 %b, 7
@@ -129,88 +61,15 @@ define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
; X86-LABEL: test_bitreverse_srli_bitreverse_i64:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655764, %eax # imm = 0x55555554
-; X86-NEXT: leal (%eax,%ecx,2), %eax
-; X86-NEXT: shrl %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645127, %eax # imm = 0xF0F0F07
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993457, %eax # imm = 0x33333331
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: leal (%eax,%ecx,2), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: addl %edx, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_srli_bitreverse_i64:
; X64: # %bb.0:
-; X64-NEXT: bswapq %rdi
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq $4, %rax
-; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: andq %rcx, %rdi
-; X64-NEXT: shlq $4, %rdi
-; X64-NEXT: orq %rax, %rdi
-; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rax, %rdi
-; X64-NEXT: leaq (%rdi,%rcx,4), %rax
-; X64-NEXT: movabsq $6148914689804861440, %rcx # imm = 0x5555555500000000
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914685509894144, %rdx # imm = 0x5555555400000000
-; X64-NEXT: andq %rax, %rdx
-; X64-NEXT: leaq (%rdx,%rcx,2), %rax
-; X64-NEXT: shrq $33, %rax
-; X64-NEXT: bswapq %rax
-; X64-NEXT: movabsq $1085102592318504960, %rcx # imm = 0xF0F0F0F00000000
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $4, %rax
-; X64-NEXT: movabsq $1085102557958766592, %rdx # imm = 0xF0F0F0700000000
-; X64-NEXT: andq %rax, %rdx
-; X64-NEXT: shlq $4, %rcx
-; X64-NEXT: orq %rdx, %rcx
-; X64-NEXT: movabsq $3689348813882916864, %rax # imm = 0x3333333300000000
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rcx
-; X64-NEXT: movabsq $3689348805292982272, %rdx # imm = 0x3333333100000000
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: leaq (%rdx,%rax,4), %rax
-; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: shrq %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: leaq (%rax,%rdx,2), %rax
+; X64-NEXT: shlq $33, %rax
; X64-NEXT: retq
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = lshr i64 %1, 33
@@ -218,86 +77,18 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
ret i64 %3
}
-; TODO: fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
+; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
define i32 @test_bitreverse_shli_bitreverse(i32 %a0) nounwind {
; X86-LABEL: test_bitreverse_shli_bitreverse:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl $5592405, %eax # imm = 0x555555
-; X86-NEXT: shll $6, %ecx
-; X86-NEXT: andl $-1431655808, %ecx # imm = 0xAAAAAA80
-; X86-NEXT: shll $8, %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $986895, %ecx # imm = 0xF0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $135204623, %eax # imm = 0x80F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $3355443, %ecx # imm = 0x333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $36909875, %eax # imm = 0x2333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NEXT: shrl $7, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_shli_bitreverse:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: bswapl %edi
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: shrl $4, %edi
-; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT: orl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: leal (%rdi,%rax,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $5592405, %ecx # imm = 0x555555
-; X64-NEXT: shll $6, %eax
-; X64-NEXT: andl $-1431655808, %eax # imm = 0xAAAAAA80
-; X64-NEXT: shll $8, %ecx
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: bswapl %ecx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: andl $986895, %eax # imm = 0xF0F0F
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: shrl $4, %ecx
-; X64-NEXT: andl $135204623, %ecx # imm = 0x80F0F0F
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: andl $3355443, %eax # imm = 0x333333
-; X64-NEXT: shrl $2, %ecx
-; X64-NEXT: andl $36909875, %ecx # imm = 0x2333333
-; X64-NEXT: leal (%rcx,%rax,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: leal (%rax,%rcx,2), %eax
+; X64-NEXT: shrl $7, %eax
; X64-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a0)
%c = shl i32 %b, 7
@@ -309,79 +100,14 @@ define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
; X86-LABEL: test_bitreverse_shli_bitreverse_i64:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $357913941, %ecx # imm = 0x15555555
-; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $235867919, %ecx # imm = 0xE0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $590558003, %ecx # imm = 0x23333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_shli_bitreverse_i64:
; X64: # %bb.0:
-; X64-NEXT: bswapq %rdi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: shrl $4, %edi
-; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT: orl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: leal (%rdi,%rax,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $357913941, %ecx # imm = 0x15555555
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: leal (%rax,%rcx,2), %eax
-; X64-NEXT: shlq $33, %rax
-; X64-NEXT: bswapq %rax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $235867919, %ecx # imm = 0xE0F0F0F
-; X64-NEXT: shll $4, %ecx
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $590558003, %ecx # imm = 0x23333333
-; X64-NEXT: shrl $2, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: leal (%rax,%rcx,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $357913941, %ecx # imm = 0x15555555
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: leal (%rax,%rcx,2), %eax
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $33, %rax
; X64-NEXT: retq
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = shl i64 %1, 33
More information about the llvm-commits
mailing list