[llvm] d8f929a - [RISCV] Custom legalize BITREVERSE with Zbkb.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 28 23:30:42 PST 2022
Author: Craig Topper
Date: 2022-01-28T23:11:12-08:00
New Revision: d8f929a567083a6b90264193f1e4476f6b77c5fe
URL: https://github.com/llvm/llvm-project/commit/d8f929a567083a6b90264193f1e4476f6b77c5fe
DIFF: https://github.com/llvm/llvm-project/commit/d8f929a567083a6b90264193f1e4476f6b77c5fe.diff
LOG: [RISCV] Custom legalize BITREVERSE with Zbkb.
With Zbkb, a bitreverse can be split into a rev8 and a brev8.
Reviewed By: VincentWu
Differential Revision: https://reviews.llvm.org/D118430
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8d5d69420d5f4..9d1bd7b4b5e64 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -282,6 +282,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
? Legal
: Expand);
+ // Zbkb can use rev8+brev8 to implement bitreverse.
+ setOperationAction(ISD::BITREVERSE, XLenVT,
+ Subtarget.hasStdExtZbkb() ? Custom : Expand);
}
if (Subtarget.hasStdExtZbb()) {
@@ -2955,17 +2958,23 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_VOID(Op, DAG);
case ISD::BSWAP:
case ISD::BITREVERSE: {
- // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
- assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
- // Start with the maximum immediate value which is the bitwidth - 1.
- unsigned Imm = VT.getSizeInBits() - 1;
- // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
- if (Op.getOpcode() == ISD::BSWAP)
- Imm &= ~0x7U;
- return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
- DAG.getConstant(Imm, DL, VT));
+ if (Subtarget.hasStdExtZbp()) {
+ // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
+ // Start with the maximum immediate value which is the bitwidth - 1.
+ unsigned Imm = VT.getSizeInBits() - 1;
+ // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
+ if (Op.getOpcode() == ISD::BSWAP)
+ Imm &= ~0x7U;
+ return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Imm, DL, VT));
+ }
+ assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
+ assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
+ // Expand bitreverse to a bswap(rev8) followed by brev8.
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
+ return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
}
case ISD::FSHL:
case ISD::FSHR: {
@@ -10097,6 +10106,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STRICT_FCVT_W_RV64)
NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)
+ NODE_NAME_CASE(BREV8)
NODE_NAME_CASE(GREV)
NODE_NAME_CASE(GREVW)
NODE_NAME_CASE(GORC)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 840a821870a79..74988a0cf6c40 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -99,6 +99,8 @@ enum NodeType : unsigned {
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
// (returns (Lo, Hi)). It takes a chain operand.
READ_CYCLE_WIDE,
+ // Reverse bits in each byte.
+ BREV8,
// Generalized Reverse and Generalized Or-Combine - directly matching the
// semantics of the named RISC-V instructions. Lowered as custom nodes as
// TableGen chokes when faced with commutative permutations in deeply-nested
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 99c1309b88341..bf1f493683ce3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -43,6 +43,7 @@ def riscv_fslw : SDNode<"RISCVISD::FSLW", SDT_RISCVIntShiftDOpW>;
def riscv_fsrw : SDNode<"RISCVISD::FSRW", SDT_RISCVIntShiftDOpW>;
def riscv_fsl : SDNode<"RISCVISD::FSL", SDTIntShiftDOp>;
def riscv_fsr : SDNode<"RISCVISD::FSR", SDTIntShiftDOp>;
+def riscv_brev8 : SDNode<"RISCVISD::BREV8", SDTIntUnaryOp>;
def riscv_grev : SDNode<"RISCVISD::GREV", SDTIntBinOp>;
def riscv_grevw : SDNode<"RISCVISD::GREVW", SDT_RISCVIntBinOpW>;
def riscv_gorc : SDNode<"RISCVISD::GORC", SDTIntBinOp>;
@@ -1190,6 +1191,7 @@ let Predicates = [HasStdExtZbf, IsRV64] in
def : PatGprGpr<riscv_bfpw, BFPW>;
let Predicates = [HasStdExtZbkb] in {
+def : PatGpr<riscv_brev8, BREV8>;
def : PatGpr<int_riscv_brev8, BREV8>;
} // Predicates = [HasStdExtZbkb]
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
index 87efa43523517..d442602ef5ab1 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
@@ -4,13 +4,13 @@
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV32ZB
+; RUN: | FileCheck %s -check-prefixes=RV32ZB,RV32ZBB
; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV64ZB
+; RUN: | FileCheck %s -check-prefixes=RV64ZB,RV64ZBB
; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV32ZB
+; RUN: | FileCheck %s -check-prefixes=RV32ZB,RV32ZBKB
; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV64ZB
+; RUN: | FileCheck %s -check-prefixes=RV64ZB,RV64ZBKB
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
@@ -212,43 +212,57 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bitreverse_i8:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: andi a1, a0, 15
-; RV32ZB-NEXT: slli a1, a1, 4
-; RV32ZB-NEXT: slli a0, a0, 24
-; RV32ZB-NEXT: srli a0, a0, 28
-; RV32ZB-NEXT: or a0, a0, a1
-; RV32ZB-NEXT: andi a1, a0, 51
-; RV32ZB-NEXT: slli a1, a1, 2
-; RV32ZB-NEXT: srli a0, a0, 2
-; RV32ZB-NEXT: andi a0, a0, 51
-; RV32ZB-NEXT: or a0, a0, a1
-; RV32ZB-NEXT: andi a1, a0, 85
-; RV32ZB-NEXT: slli a1, a1, 1
-; RV32ZB-NEXT: srli a0, a0, 1
-; RV32ZB-NEXT: andi a0, a0, 85
-; RV32ZB-NEXT: or a0, a0, a1
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bitreverse_i8:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: andi a1, a0, 15
+; RV32ZBB-NEXT: slli a1, a1, 4
+; RV32ZBB-NEXT: slli a0, a0, 24
+; RV32ZBB-NEXT: srli a0, a0, 28
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: andi a1, a0, 51
+; RV32ZBB-NEXT: slli a1, a1, 2
+; RV32ZBB-NEXT: srli a0, a0, 2
+; RV32ZBB-NEXT: andi a0, a0, 51
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: andi a1, a0, 85
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: srli a0, a0, 1
+; RV32ZBB-NEXT: andi a0, a0, 85
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bitreverse_i8:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: andi a1, a0, 15
-; RV64ZB-NEXT: slli a1, a1, 4
-; RV64ZB-NEXT: slli a0, a0, 56
-; RV64ZB-NEXT: srli a0, a0, 60
-; RV64ZB-NEXT: or a0, a0, a1
-; RV64ZB-NEXT: andi a1, a0, 51
-; RV64ZB-NEXT: slli a1, a1, 2
-; RV64ZB-NEXT: srli a0, a0, 2
-; RV64ZB-NEXT: andi a0, a0, 51
-; RV64ZB-NEXT: or a0, a0, a1
-; RV64ZB-NEXT: andi a1, a0, 85
-; RV64ZB-NEXT: slli a1, a1, 1
-; RV64ZB-NEXT: srli a0, a0, 1
-; RV64ZB-NEXT: andi a0, a0, 85
-; RV64ZB-NEXT: or a0, a0, a1
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bitreverse_i8:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: andi a1, a0, 15
+; RV64ZBB-NEXT: slli a1, a1, 4
+; RV64ZBB-NEXT: slli a0, a0, 56
+; RV64ZBB-NEXT: srli a0, a0, 60
+; RV64ZBB-NEXT: or a0, a0, a1
+; RV64ZBB-NEXT: andi a1, a0, 51
+; RV64ZBB-NEXT: slli a1, a1, 2
+; RV64ZBB-NEXT: srli a0, a0, 2
+; RV64ZBB-NEXT: andi a0, a0, 51
+; RV64ZBB-NEXT: or a0, a0, a1
+; RV64ZBB-NEXT: andi a1, a0, 85
+; RV64ZBB-NEXT: slli a1, a1, 1
+; RV64ZBB-NEXT: srli a0, a0, 1
+; RV64ZBB-NEXT: andi a0, a0, 85
+; RV64ZBB-NEXT: or a0, a0, a1
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i8:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 24
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i8:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 56
+; RV64ZBKB-NEXT: ret
%tmp = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %tmp
}
@@ -312,57 +326,71 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bitreverse_i16:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: rev8 a0, a0
-; RV32ZB-NEXT: srli a1, a0, 12
-; RV32ZB-NEXT: lui a2, 15
-; RV32ZB-NEXT: addi a2, a2, 240
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: srli a0, a0, 20
-; RV32ZB-NEXT: andi a0, a0, -241
-; RV32ZB-NEXT: or a0, a0, a1
-; RV32ZB-NEXT: srli a1, a0, 2
-; RV32ZB-NEXT: lui a2, 3
-; RV32ZB-NEXT: addi a2, a2, 819
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 1
-; RV32ZB-NEXT: lui a2, 5
-; RV32ZB-NEXT: addi a2, a2, 1365
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bitreverse_i16:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: rev8 a0, a0
+; RV32ZBB-NEXT: srli a1, a0, 12
+; RV32ZBB-NEXT: lui a2, 15
+; RV32ZBB-NEXT: addi a2, a2, 240
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: srli a0, a0, 20
+; RV32ZBB-NEXT: andi a0, a0, -241
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: lui a2, 3
+; RV32ZBB-NEXT: addi a2, a2, 819
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: lui a2, 5
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bitreverse_i16:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: rev8 a0, a0
-; RV64ZB-NEXT: srli a1, a0, 44
-; RV64ZB-NEXT: lui a2, 15
-; RV64ZB-NEXT: addiw a2, a2, 240
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: srli a0, a0, 52
-; RV64ZB-NEXT: andi a0, a0, -241
-; RV64ZB-NEXT: or a0, a0, a1
-; RV64ZB-NEXT: srli a1, a0, 2
-; RV64ZB-NEXT: lui a2, 3
-; RV64ZB-NEXT: addiw a2, a2, 819
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 2
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 1
-; RV64ZB-NEXT: lui a2, 5
-; RV64ZB-NEXT: addiw a2, a2, 1365
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 1
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bitreverse_i16:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: rev8 a0, a0
+; RV64ZBB-NEXT: srli a1, a0, 44
+; RV64ZBB-NEXT: lui a2, 15
+; RV64ZBB-NEXT: addiw a2, a2, 240
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: srli a0, a0, 52
+; RV64ZBB-NEXT: andi a0, a0, -241
+; RV64ZBB-NEXT: or a0, a0, a1
+; RV64ZBB-NEXT: srli a1, a0, 2
+; RV64ZBB-NEXT: lui a2, 3
+; RV64ZBB-NEXT: addiw a2, a2, 819
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 2
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 1
+; RV64ZBB-NEXT: lui a2, 5
+; RV64ZBB-NEXT: addiw a2, a2, 1365
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 1
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i16:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 16
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i16:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 48
+; RV64ZBKB-NEXT: ret
%tmp = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %tmp
}
@@ -442,60 +470,73 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bitreverse_i32:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: rev8 a0, a0
-; RV32ZB-NEXT: srli a1, a0, 4
-; RV32ZB-NEXT: lui a2, 61681
-; RV32ZB-NEXT: addi a2, a2, -241
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 2
-; RV32ZB-NEXT: lui a2, 209715
-; RV32ZB-NEXT: addi a2, a2, 819
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 1
-; RV32ZB-NEXT: lui a2, 349525
-; RV32ZB-NEXT: addi a2, a2, 1365
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bitreverse_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: rev8 a0, a0
+; RV32ZBB-NEXT: srli a1, a0, 4
+; RV32ZBB-NEXT: lui a2, 61681
+; RV32ZBB-NEXT: addi a2, a2, -241
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: lui a2, 209715
+; RV32ZBB-NEXT: addi a2, a2, 819
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: lui a2, 349525
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bitreverse_i32:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: rev8 a0, a0
-; RV64ZB-NEXT: srli a1, a0, 36
-; RV64ZB-NEXT: lui a2, 61681
-; RV64ZB-NEXT: addiw a2, a2, -241
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: srli a0, a0, 28
-; RV64ZB-NEXT: lui a2, 986895
-; RV64ZB-NEXT: addiw a2, a2, 240
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: sext.w a0, a0
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 2
-; RV64ZB-NEXT: lui a2, 209715
-; RV64ZB-NEXT: addiw a2, a2, 819
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 2
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 1
-; RV64ZB-NEXT: lui a2, 349525
-; RV64ZB-NEXT: addiw a2, a2, 1365
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 1
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bitreverse_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: rev8 a0, a0
+; RV64ZBB-NEXT: srli a1, a0, 36
+; RV64ZBB-NEXT: lui a2, 61681
+; RV64ZBB-NEXT: addiw a2, a2, -241
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: srli a0, a0, 28
+; RV64ZBB-NEXT: lui a2, 986895
+; RV64ZBB-NEXT: addiw a2, a2, 240
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 2
+; RV64ZBB-NEXT: lui a2, 209715
+; RV64ZBB-NEXT: addiw a2, a2, 819
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 2
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 1
+; RV64ZBB-NEXT: lui a2, 349525
+; RV64ZBB-NEXT: addiw a2, a2, 1365
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 1
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i32:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i32:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: ret
%tmp = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %tmp
}
@@ -616,78 +657,94 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bitreverse_i64:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: rev8 a1, a1
-; RV32ZB-NEXT: srli a2, a1, 4
-; RV32ZB-NEXT: lui a3, 61681
-; RV32ZB-NEXT: addi a3, a3, -241
-; RV32ZB-NEXT: and a2, a2, a3
-; RV32ZB-NEXT: and a1, a1, a3
-; RV32ZB-NEXT: slli a1, a1, 4
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: srli a2, a1, 2
-; RV32ZB-NEXT: lui a4, 209715
-; RV32ZB-NEXT: addi a4, a4, 819
-; RV32ZB-NEXT: and a2, a2, a4
-; RV32ZB-NEXT: and a1, a1, a4
-; RV32ZB-NEXT: slli a1, a1, 2
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: srli a2, a1, 1
-; RV32ZB-NEXT: lui a5, 349525
-; RV32ZB-NEXT: addi a5, a5, 1365
-; RV32ZB-NEXT: and a2, a2, a5
-; RV32ZB-NEXT: and a1, a1, a5
-; RV32ZB-NEXT: slli a1, a1, 1
-; RV32ZB-NEXT: or a2, a2, a1
-; RV32ZB-NEXT: rev8 a0, a0
-; RV32ZB-NEXT: srli a1, a0, 4
-; RV32ZB-NEXT: and a1, a1, a3
-; RV32ZB-NEXT: and a0, a0, a3
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 2
-; RV32ZB-NEXT: and a1, a1, a4
-; RV32ZB-NEXT: and a0, a0, a4
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 1
-; RV32ZB-NEXT: and a1, a1, a5
-; RV32ZB-NEXT: and a0, a0, a5
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a1, a1, a0
-; RV32ZB-NEXT: mv a0, a2
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bitreverse_i64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: rev8 a1, a1
+; RV32ZBB-NEXT: srli a2, a1, 4
+; RV32ZBB-NEXT: lui a3, 61681
+; RV32ZBB-NEXT: addi a3, a3, -241
+; RV32ZBB-NEXT: and a2, a2, a3
+; RV32ZBB-NEXT: and a1, a1, a3
+; RV32ZBB-NEXT: slli a1, a1, 4
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: srli a2, a1, 2
+; RV32ZBB-NEXT: lui a4, 209715
+; RV32ZBB-NEXT: addi a4, a4, 819
+; RV32ZBB-NEXT: and a2, a2, a4
+; RV32ZBB-NEXT: and a1, a1, a4
+; RV32ZBB-NEXT: slli a1, a1, 2
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: srli a2, a1, 1
+; RV32ZBB-NEXT: lui a5, 349525
+; RV32ZBB-NEXT: addi a5, a5, 1365
+; RV32ZBB-NEXT: and a2, a2, a5
+; RV32ZBB-NEXT: and a1, a1, a5
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: or a2, a2, a1
+; RV32ZBB-NEXT: rev8 a0, a0
+; RV32ZBB-NEXT: srli a1, a0, 4
+; RV32ZBB-NEXT: and a1, a1, a3
+; RV32ZBB-NEXT: and a0, a0, a3
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: and a1, a1, a4
+; RV32ZBB-NEXT: and a0, a0, a4
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: and a1, a1, a5
+; RV32ZBB-NEXT: and a0, a0, a5
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a1, a1, a0
+; RV32ZBB-NEXT: mv a0, a2
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bitreverse_i64:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: lui a1, %hi(.LCPI6_0)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI6_0)(a1)
-; RV64ZB-NEXT: rev8 a0, a0
-; RV64ZB-NEXT: srli a2, a0, 4
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: lui a1, %hi(.LCPI6_1)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI6_1)(a1)
-; RV64ZB-NEXT: slli a0, a0, 4
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: srli a2, a0, 2
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: lui a1, %hi(.LCPI6_2)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI6_2)(a1)
-; RV64ZB-NEXT: slli a0, a0, 2
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: srli a2, a0, 1
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: slli a0, a0, 1
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bitreverse_i64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_0)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_0)(a1)
+; RV64ZBB-NEXT: rev8 a0, a0
+; RV64ZBB-NEXT: srli a2, a0, 4
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_1)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_1)(a1)
+; RV64ZBB-NEXT: slli a0, a0, 4
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: srli a2, a0, 2
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_2)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_2)(a1)
+; RV64ZBB-NEXT: slli a0, a0, 2
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: srli a2, a0, 1
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: slli a0, a0, 1
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i64:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: rev8 a1, a1
+; RV32ZBKB-NEXT: brev8 a2, a1
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a1, a0
+; RV32ZBKB-NEXT: mv a0, a2
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i64:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: ret
%tmp = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %tmp
}
+; FIXME: Merge the away the two rev8s in the Zbkb code.
define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
; RV32I-LABEL: test_bswap_bitreverse_i16:
; RV32I: # %bb.0:
@@ -739,60 +796,79 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bswap_bitreverse_i16:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: srli a1, a0, 4
-; RV32ZB-NEXT: lui a2, 1
-; RV32ZB-NEXT: addi a2, a2, -241
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 2
-; RV32ZB-NEXT: lui a2, 3
-; RV32ZB-NEXT: addi a2, a2, 819
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 1
-; RV32ZB-NEXT: lui a2, 5
-; RV32ZB-NEXT: addi a2, a2, 1365
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bswap_bitreverse_i16:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: srli a1, a0, 4
+; RV32ZBB-NEXT: lui a2, 1
+; RV32ZBB-NEXT: addi a2, a2, -241
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: lui a2, 3
+; RV32ZBB-NEXT: addi a2, a2, 819
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: lui a2, 5
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bswap_bitreverse_i16:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: srli a1, a0, 4
-; RV64ZB-NEXT: lui a2, 1
-; RV64ZB-NEXT: addiw a2, a2, -241
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 4
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 2
-; RV64ZB-NEXT: lui a2, 3
-; RV64ZB-NEXT: addiw a2, a2, 819
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 2
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 1
-; RV64ZB-NEXT: lui a2, 5
-; RV64ZB-NEXT: addiw a2, a2, 1365
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 1
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bswap_bitreverse_i16:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: srli a1, a0, 4
+; RV64ZBB-NEXT: lui a2, 1
+; RV64ZBB-NEXT: addiw a2, a2, -241
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 4
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 2
+; RV64ZBB-NEXT: lui a2, 3
+; RV64ZBB-NEXT: addiw a2, a2, 819
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 2
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 1
+; RV64ZBB-NEXT: lui a2, 5
+; RV64ZBB-NEXT: addiw a2, a2, 1365
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 1
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bswap_bitreverse_i16:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 16
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 16
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bswap_bitreverse_i16:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 48
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 48
+; RV64ZBKB-NEXT: ret
%tmp = call i16 @llvm.bswap.i16(i16 %a)
%tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp)
ret i16 %tmp2
}
+; FIXME: Merge the away the two rev8s in the Zbkb code.
define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
; RV32I-LABEL: test_bswap_bitreverse_i32:
; RV32I: # %bb.0:
@@ -844,55 +920,69 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bswap_bitreverse_i32:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: srli a1, a0, 4
-; RV32ZB-NEXT: lui a2, 61681
-; RV32ZB-NEXT: addi a2, a2, -241
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 2
-; RV32ZB-NEXT: lui a2, 209715
-; RV32ZB-NEXT: addi a2, a2, 819
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 1
-; RV32ZB-NEXT: lui a2, 349525
-; RV32ZB-NEXT: addi a2, a2, 1365
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bswap_bitreverse_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: srli a1, a0, 4
+; RV32ZBB-NEXT: lui a2, 61681
+; RV32ZBB-NEXT: addi a2, a2, -241
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: lui a2, 209715
+; RV32ZBB-NEXT: addi a2, a2, 819
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: lui a2, 349525
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bswap_bitreverse_i32:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: srli a1, a0, 4
-; RV64ZB-NEXT: lui a2, 61681
-; RV64ZB-NEXT: addiw a2, a2, -241
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 4
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 2
-; RV64ZB-NEXT: lui a2, 209715
-; RV64ZB-NEXT: addiw a2, a2, 819
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 2
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 1
-; RV64ZB-NEXT: lui a2, 349525
-; RV64ZB-NEXT: addiw a2, a2, 1365
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 1
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bswap_bitreverse_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: srli a1, a0, 4
+; RV64ZBB-NEXT: lui a2, 61681
+; RV64ZBB-NEXT: addiw a2, a2, -241
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 4
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 2
+; RV64ZBB-NEXT: lui a2, 209715
+; RV64ZBB-NEXT: addiw a2, a2, 819
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 2
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 1
+; RV64ZBB-NEXT: lui a2, 349525
+; RV64ZBB-NEXT: addiw a2, a2, 1365
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 1
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bswap_bitreverse_i32:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bswap_bitreverse_i32:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: ret
%tmp = call i32 @llvm.bswap.i32(i32 %a)
%tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp)
ret i32 %tmp2
@@ -964,75 +1054,87 @@ define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bswap_bitreverse_i64:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: srli a2, a0, 4
-; RV32ZB-NEXT: lui a3, 61681
-; RV32ZB-NEXT: addi a3, a3, -241
-; RV32ZB-NEXT: and a2, a2, a3
-; RV32ZB-NEXT: and a0, a0, a3
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a2, a0
-; RV32ZB-NEXT: srli a2, a0, 2
-; RV32ZB-NEXT: lui a4, 209715
-; RV32ZB-NEXT: addi a4, a4, 819
-; RV32ZB-NEXT: and a2, a2, a4
-; RV32ZB-NEXT: and a0, a0, a4
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a2, a0
-; RV32ZB-NEXT: srli a2, a0, 1
-; RV32ZB-NEXT: lui a5, 349525
-; RV32ZB-NEXT: addi a5, a5, 1365
-; RV32ZB-NEXT: and a2, a2, a5
-; RV32ZB-NEXT: and a0, a0, a5
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a2, a0
-; RV32ZB-NEXT: srli a2, a1, 4
-; RV32ZB-NEXT: and a2, a2, a3
-; RV32ZB-NEXT: and a1, a1, a3
-; RV32ZB-NEXT: slli a1, a1, 4
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: srli a2, a1, 2
-; RV32ZB-NEXT: and a2, a2, a4
-; RV32ZB-NEXT: and a1, a1, a4
-; RV32ZB-NEXT: slli a1, a1, 2
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: srli a2, a1, 1
-; RV32ZB-NEXT: and a2, a2, a5
-; RV32ZB-NEXT: and a1, a1, a5
-; RV32ZB-NEXT: slli a1, a1, 1
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bswap_bitreverse_i64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: srli a2, a0, 4
+; RV32ZBB-NEXT: lui a3, 61681
+; RV32ZBB-NEXT: addi a3, a3, -241
+; RV32ZBB-NEXT: and a2, a2, a3
+; RV32ZBB-NEXT: and a0, a0, a3
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a2, a0
+; RV32ZBB-NEXT: srli a2, a0, 2
+; RV32ZBB-NEXT: lui a4, 209715
+; RV32ZBB-NEXT: addi a4, a4, 819
+; RV32ZBB-NEXT: and a2, a2, a4
+; RV32ZBB-NEXT: and a0, a0, a4
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a2, a0
+; RV32ZBB-NEXT: srli a2, a0, 1
+; RV32ZBB-NEXT: lui a5, 349525
+; RV32ZBB-NEXT: addi a5, a5, 1365
+; RV32ZBB-NEXT: and a2, a2, a5
+; RV32ZBB-NEXT: and a0, a0, a5
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a2, a0
+; RV32ZBB-NEXT: srli a2, a1, 4
+; RV32ZBB-NEXT: and a2, a2, a3
+; RV32ZBB-NEXT: and a1, a1, a3
+; RV32ZBB-NEXT: slli a1, a1, 4
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: srli a2, a1, 2
+; RV32ZBB-NEXT: and a2, a2, a4
+; RV32ZBB-NEXT: and a1, a1, a4
+; RV32ZBB-NEXT: slli a1, a1, 2
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: srli a2, a1, 1
+; RV32ZBB-NEXT: and a2, a2, a5
+; RV32ZBB-NEXT: and a1, a1, a5
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bswap_bitreverse_i64:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: lui a1, %hi(.LCPI9_0)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI9_0)(a1)
-; RV64ZB-NEXT: srli a2, a0, 4
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: lui a1, %hi(.LCPI9_1)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI9_1)(a1)
-; RV64ZB-NEXT: slli a0, a0, 4
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: srli a2, a0, 2
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: lui a1, %hi(.LCPI9_2)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI9_2)(a1)
-; RV64ZB-NEXT: slli a0, a0, 2
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: srli a2, a0, 1
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: slli a0, a0, 1
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bswap_bitreverse_i64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_0)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_0)(a1)
+; RV64ZBB-NEXT: srli a2, a0, 4
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_1)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_1)(a1)
+; RV64ZBB-NEXT: slli a0, a0, 4
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: srli a2, a0, 2
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_2)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_2)(a1)
+; RV64ZBB-NEXT: slli a0, a0, 2
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: srli a2, a0, 1
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: slli a0, a0, 1
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bswap_bitreverse_i64:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a1, a1
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bswap_bitreverse_i64:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: ret
%tmp = call i64 @llvm.bswap.i64(i64 %a)
%tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp)
ret i64 %tmp2
}
+; FIXME: Merge the away the two rev8s in the Zbkb code.
define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
; RV32I-LABEL: test_bitreverse_bswap_i16:
; RV32I: # %bb.0:
@@ -1084,60 +1186,79 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bitreverse_bswap_i16:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: srli a1, a0, 4
-; RV32ZB-NEXT: lui a2, 1
-; RV32ZB-NEXT: addi a2, a2, -241
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 2
-; RV32ZB-NEXT: lui a2, 3
-; RV32ZB-NEXT: addi a2, a2, 819
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 1
-; RV32ZB-NEXT: lui a2, 5
-; RV32ZB-NEXT: addi a2, a2, 1365
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bitreverse_bswap_i16:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: srli a1, a0, 4
+; RV32ZBB-NEXT: lui a2, 1
+; RV32ZBB-NEXT: addi a2, a2, -241
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: lui a2, 3
+; RV32ZBB-NEXT: addi a2, a2, 819
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: lui a2, 5
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bitreverse_bswap_i16:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: srli a1, a0, 4
-; RV64ZB-NEXT: lui a2, 1
-; RV64ZB-NEXT: addiw a2, a2, -241
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 4
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 2
-; RV64ZB-NEXT: lui a2, 3
-; RV64ZB-NEXT: addiw a2, a2, 819
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 2
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 1
-; RV64ZB-NEXT: lui a2, 5
-; RV64ZB-NEXT: addiw a2, a2, 1365
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slli a0, a0, 1
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bitreverse_bswap_i16:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: srli a1, a0, 4
+; RV64ZBB-NEXT: lui a2, 1
+; RV64ZBB-NEXT: addiw a2, a2, -241
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 4
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 2
+; RV64ZBB-NEXT: lui a2, 3
+; RV64ZBB-NEXT: addiw a2, a2, 819
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 2
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 1
+; RV64ZBB-NEXT: lui a2, 5
+; RV64ZBB-NEXT: addiw a2, a2, 1365
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slli a0, a0, 1
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_bswap_i16:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 16
+; RV32ZBKB-NEXT: rev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 16
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_bswap_i16:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 48
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 48
+; RV64ZBKB-NEXT: ret
%tmp = call i16 @llvm.bitreverse.i16(i16 %a)
%tmp2 = call i16 @llvm.bswap.i16(i16 %tmp)
ret i16 %tmp2
}
+; FIXME: Merge the away the two rev8s in the Zbkb code.
define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
; RV32I-LABEL: test_bitreverse_bswap_i32:
; RV32I: # %bb.0:
@@ -1189,55 +1310,69 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bitreverse_bswap_i32:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: srli a1, a0, 4
-; RV32ZB-NEXT: lui a2, 61681
-; RV32ZB-NEXT: addi a2, a2, -241
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 2
-; RV32ZB-NEXT: lui a2, 209715
-; RV32ZB-NEXT: addi a2, a2, 819
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: srli a1, a0, 1
-; RV32ZB-NEXT: lui a2, 349525
-; RV32ZB-NEXT: addi a2, a2, 1365
-; RV32ZB-NEXT: and a1, a1, a2
-; RV32ZB-NEXT: and a0, a0, a2
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a1, a0
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bitreverse_bswap_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: srli a1, a0, 4
+; RV32ZBB-NEXT: lui a2, 61681
+; RV32ZBB-NEXT: addi a2, a2, -241
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: lui a2, 209715
+; RV32ZBB-NEXT: addi a2, a2, 819
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: lui a2, 349525
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bitreverse_bswap_i32:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: srli a1, a0, 4
-; RV64ZB-NEXT: lui a2, 61681
-; RV64ZB-NEXT: addiw a2, a2, -241
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 4
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 2
-; RV64ZB-NEXT: lui a2, 209715
-; RV64ZB-NEXT: addiw a2, a2, 819
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 2
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: srli a1, a0, 1
-; RV64ZB-NEXT: lui a2, 349525
-; RV64ZB-NEXT: addiw a2, a2, 1365
-; RV64ZB-NEXT: and a1, a1, a2
-; RV64ZB-NEXT: and a0, a0, a2
-; RV64ZB-NEXT: slliw a0, a0, 1
-; RV64ZB-NEXT: or a0, a1, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bitreverse_bswap_i32:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: srli a1, a0, 4
+; RV64ZBB-NEXT: lui a2, 61681
+; RV64ZBB-NEXT: addiw a2, a2, -241
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 4
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 2
+; RV64ZBB-NEXT: lui a2, 209715
+; RV64ZBB-NEXT: addiw a2, a2, 819
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 2
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: srli a1, a0, 1
+; RV64ZBB-NEXT: lui a2, 349525
+; RV64ZBB-NEXT: addiw a2, a2, 1365
+; RV64ZBB-NEXT: and a1, a1, a2
+; RV64ZBB-NEXT: and a0, a0, a2
+; RV64ZBB-NEXT: slliw a0, a0, 1
+; RV64ZBB-NEXT: or a0, a1, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_bswap_i32:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_bswap_i32:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: rev8 a0, a0
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: ret
%tmp = call i32 @llvm.bitreverse.i32(i32 %a)
%tmp2 = call i32 @llvm.bswap.i32(i32 %tmp)
ret i32 %tmp2
@@ -1309,70 +1444,81 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
;
-; RV32ZB-LABEL: test_bitreverse_bswap_i64:
-; RV32ZB: # %bb.0:
-; RV32ZB-NEXT: srli a2, a0, 4
-; RV32ZB-NEXT: lui a3, 61681
-; RV32ZB-NEXT: addi a3, a3, -241
-; RV32ZB-NEXT: and a2, a2, a3
-; RV32ZB-NEXT: and a0, a0, a3
-; RV32ZB-NEXT: slli a0, a0, 4
-; RV32ZB-NEXT: or a0, a2, a0
-; RV32ZB-NEXT: srli a2, a0, 2
-; RV32ZB-NEXT: lui a4, 209715
-; RV32ZB-NEXT: addi a4, a4, 819
-; RV32ZB-NEXT: and a2, a2, a4
-; RV32ZB-NEXT: and a0, a0, a4
-; RV32ZB-NEXT: slli a0, a0, 2
-; RV32ZB-NEXT: or a0, a2, a0
-; RV32ZB-NEXT: srli a2, a0, 1
-; RV32ZB-NEXT: lui a5, 349525
-; RV32ZB-NEXT: addi a5, a5, 1365
-; RV32ZB-NEXT: and a2, a2, a5
-; RV32ZB-NEXT: and a0, a0, a5
-; RV32ZB-NEXT: slli a0, a0, 1
-; RV32ZB-NEXT: or a0, a2, a0
-; RV32ZB-NEXT: srli a2, a1, 4
-; RV32ZB-NEXT: and a2, a2, a3
-; RV32ZB-NEXT: and a1, a1, a3
-; RV32ZB-NEXT: slli a1, a1, 4
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: srli a2, a1, 2
-; RV32ZB-NEXT: and a2, a2, a4
-; RV32ZB-NEXT: and a1, a1, a4
-; RV32ZB-NEXT: slli a1, a1, 2
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: srli a2, a1, 1
-; RV32ZB-NEXT: and a2, a2, a5
-; RV32ZB-NEXT: and a1, a1, a5
-; RV32ZB-NEXT: slli a1, a1, 1
-; RV32ZB-NEXT: or a1, a2, a1
-; RV32ZB-NEXT: ret
+; RV32ZBB-LABEL: test_bitreverse_bswap_i64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: srli a2, a0, 4
+; RV32ZBB-NEXT: lui a3, 61681
+; RV32ZBB-NEXT: addi a3, a3, -241
+; RV32ZBB-NEXT: and a2, a2, a3
+; RV32ZBB-NEXT: and a0, a0, a3
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a2, a0
+; RV32ZBB-NEXT: srli a2, a0, 2
+; RV32ZBB-NEXT: lui a4, 209715
+; RV32ZBB-NEXT: addi a4, a4, 819
+; RV32ZBB-NEXT: and a2, a2, a4
+; RV32ZBB-NEXT: and a0, a0, a4
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a2, a0
+; RV32ZBB-NEXT: srli a2, a0, 1
+; RV32ZBB-NEXT: lui a5, 349525
+; RV32ZBB-NEXT: addi a5, a5, 1365
+; RV32ZBB-NEXT: and a2, a2, a5
+; RV32ZBB-NEXT: and a0, a0, a5
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a2, a0
+; RV32ZBB-NEXT: srli a2, a1, 4
+; RV32ZBB-NEXT: and a2, a2, a3
+; RV32ZBB-NEXT: and a1, a1, a3
+; RV32ZBB-NEXT: slli a1, a1, 4
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: srli a2, a1, 2
+; RV32ZBB-NEXT: and a2, a2, a4
+; RV32ZBB-NEXT: and a1, a1, a4
+; RV32ZBB-NEXT: slli a1, a1, 2
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: srli a2, a1, 1
+; RV32ZBB-NEXT: and a2, a2, a5
+; RV32ZBB-NEXT: and a1, a1, a5
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: or a1, a2, a1
+; RV32ZBB-NEXT: ret
;
-; RV64ZB-LABEL: test_bitreverse_bswap_i64:
-; RV64ZB: # %bb.0:
-; RV64ZB-NEXT: lui a1, %hi(.LCPI12_0)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI12_0)(a1)
-; RV64ZB-NEXT: srli a2, a0, 4
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: lui a1, %hi(.LCPI12_1)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI12_1)(a1)
-; RV64ZB-NEXT: slli a0, a0, 4
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: srli a2, a0, 2
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: lui a1, %hi(.LCPI12_2)
-; RV64ZB-NEXT: ld a1, %lo(.LCPI12_2)(a1)
-; RV64ZB-NEXT: slli a0, a0, 2
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: srli a2, a0, 1
-; RV64ZB-NEXT: and a2, a2, a1
-; RV64ZB-NEXT: and a0, a0, a1
-; RV64ZB-NEXT: slli a0, a0, 1
-; RV64ZB-NEXT: or a0, a2, a0
-; RV64ZB-NEXT: ret
+; RV64ZBB-LABEL: test_bitreverse_bswap_i64:
+; RV64ZBB: # %bb.0:
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_0)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_0)(a1)
+; RV64ZBB-NEXT: srli a2, a0, 4
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_1)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_1)(a1)
+; RV64ZBB-NEXT: slli a0, a0, 4
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: srli a2, a0, 2
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_2)
+; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_2)(a1)
+; RV64ZBB-NEXT: slli a0, a0, 2
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: srli a2, a0, 1
+; RV64ZBB-NEXT: and a2, a2, a1
+; RV64ZBB-NEXT: and a0, a0, a1
+; RV64ZBB-NEXT: slli a0, a0, 1
+; RV64ZBB-NEXT: or a0, a2, a0
+; RV64ZBB-NEXT: ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_bswap_i64:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: brev8 a1, a1
+; RV32ZBKB-NEXT: ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_bswap_i64:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: ret
%tmp = call i64 @llvm.bitreverse.i64(i64 %a)
%tmp2 = call i64 @llvm.bswap.i64(i64 %tmp)
ret i64 %tmp2
More information about the llvm-commits
mailing list