[llvm] d8f929a - [RISCV] Custom legalize BITREVERSE with Zbkb.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 28 23:30:42 PST 2022


Author: Craig Topper
Date: 2022-01-28T23:11:12-08:00
New Revision: d8f929a567083a6b90264193f1e4476f6b77c5fe

URL: https://github.com/llvm/llvm-project/commit/d8f929a567083a6b90264193f1e4476f6b77c5fe
DIFF: https://github.com/llvm/llvm-project/commit/d8f929a567083a6b90264193f1e4476f6b77c5fe.diff

LOG: [RISCV] Custom legalize BITREVERSE with Zbkb.

With Zbkb, a bitreverse can be split into a rev8 and a brev8.

Reviewed By: VincentWu

Differential Revision: https://reviews.llvm.org/D118430

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
    llvm/test/CodeGen/RISCV/bswap-bitreverse.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8d5d69420d5f4..9d1bd7b4b5e64 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -282,6 +282,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                        (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
                            ? Legal
                            : Expand);
+    // Zbkb can use rev8+brev8 to implement bitreverse.
+    setOperationAction(ISD::BITREVERSE, XLenVT,
+                       Subtarget.hasStdExtZbkb() ? Custom : Expand);
   }
 
   if (Subtarget.hasStdExtZbb()) {
@@ -2955,17 +2958,23 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return LowerINTRINSIC_VOID(Op, DAG);
   case ISD::BSWAP:
   case ISD::BITREVERSE: {
-    // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
-    assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
     MVT VT = Op.getSimpleValueType();
     SDLoc DL(Op);
-    // Start with the maximum immediate value which is the bitwidth - 1.
-    unsigned Imm = VT.getSizeInBits() - 1;
-    // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
-    if (Op.getOpcode() == ISD::BSWAP)
-      Imm &= ~0x7U;
-    return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
-                       DAG.getConstant(Imm, DL, VT));
+    if (Subtarget.hasStdExtZbp()) {
+      // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
+      // Start with the maximum immediate value which is the bitwidth - 1.
+      unsigned Imm = VT.getSizeInBits() - 1;
+      // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
+      if (Op.getOpcode() == ISD::BSWAP)
+        Imm &= ~0x7U;
+      return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
+                         DAG.getConstant(Imm, DL, VT));
+    }
+    assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
+    assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
+    // Expand bitreverse to a bswap(rev8) followed by brev8.
+    SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
+    return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
   }
   case ISD::FSHL:
   case ISD::FSHR: {
@@ -10097,6 +10106,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
   NODE_NAME_CASE(READ_CYCLE_WIDE)
+  NODE_NAME_CASE(BREV8)
   NODE_NAME_CASE(GREV)
   NODE_NAME_CASE(GREVW)
   NODE_NAME_CASE(GORC)

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 840a821870a79..74988a0cf6c40 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -99,6 +99,8 @@ enum NodeType : unsigned {
   // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
   // (returns (Lo, Hi)). It takes a chain operand.
   READ_CYCLE_WIDE,
+  // Reverse bits in each byte.
+  BREV8,
   // Generalized Reverse and Generalized Or-Combine - directly matching the
   // semantics of the named RISC-V instructions. Lowered as custom nodes as
   // TableGen chokes when faced with commutative permutations in deeply-nested

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 99c1309b88341..bf1f493683ce3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -43,6 +43,7 @@ def riscv_fslw   : SDNode<"RISCVISD::FSLW",   SDT_RISCVIntShiftDOpW>;
 def riscv_fsrw   : SDNode<"RISCVISD::FSRW",   SDT_RISCVIntShiftDOpW>;
 def riscv_fsl    : SDNode<"RISCVISD::FSL",    SDTIntShiftDOp>;
 def riscv_fsr    : SDNode<"RISCVISD::FSR",    SDTIntShiftDOp>;
+def riscv_brev8  : SDNode<"RISCVISD::BREV8",  SDTIntUnaryOp>;
 def riscv_grev   : SDNode<"RISCVISD::GREV",   SDTIntBinOp>;
 def riscv_grevw  : SDNode<"RISCVISD::GREVW",  SDT_RISCVIntBinOpW>;
 def riscv_gorc   : SDNode<"RISCVISD::GORC",   SDTIntBinOp>;
@@ -1190,6 +1191,7 @@ let Predicates = [HasStdExtZbf, IsRV64] in
 def : PatGprGpr<riscv_bfpw, BFPW>;
 
 let Predicates = [HasStdExtZbkb] in {
+def : PatGpr<riscv_brev8, BREV8>;
 def : PatGpr<int_riscv_brev8, BREV8>;
 } // Predicates = [HasStdExtZbkb]
 

diff  --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
index 87efa43523517..d442602ef5ab1 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
@@ -4,13 +4,13 @@
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64I
 ; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV32ZB
+; RUN:   | FileCheck %s -check-prefixes=RV32ZB,RV32ZBB
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV64ZB
+; RUN:   | FileCheck %s -check-prefixes=RV64ZB,RV64ZBB
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV32ZB
+; RUN:   | FileCheck %s -check-prefixes=RV32ZB,RV32ZBKB
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV64ZB
+; RUN:   | FileCheck %s -check-prefixes=RV64ZB,RV64ZBKB
 
 declare i16 @llvm.bswap.i16(i16)
 declare i32 @llvm.bswap.i32(i32)
@@ -212,43 +212,57 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bitreverse_i8:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    andi a1, a0, 15
-; RV32ZB-NEXT:    slli a1, a1, 4
-; RV32ZB-NEXT:    slli a0, a0, 24
-; RV32ZB-NEXT:    srli a0, a0, 28
-; RV32ZB-NEXT:    or a0, a0, a1
-; RV32ZB-NEXT:    andi a1, a0, 51
-; RV32ZB-NEXT:    slli a1, a1, 2
-; RV32ZB-NEXT:    srli a0, a0, 2
-; RV32ZB-NEXT:    andi a0, a0, 51
-; RV32ZB-NEXT:    or a0, a0, a1
-; RV32ZB-NEXT:    andi a1, a0, 85
-; RV32ZB-NEXT:    slli a1, a1, 1
-; RV32ZB-NEXT:    srli a0, a0, 1
-; RV32ZB-NEXT:    andi a0, a0, 85
-; RV32ZB-NEXT:    or a0, a0, a1
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bitreverse_i8:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    andi a1, a0, 15
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    slli a0, a0, 24
+; RV32ZBB-NEXT:    srli a0, a0, 28
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    andi a1, a0, 51
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    srli a0, a0, 2
+; RV32ZBB-NEXT:    andi a0, a0, 51
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    andi a1, a0, 85
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    srli a0, a0, 1
+; RV32ZBB-NEXT:    andi a0, a0, 85
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bitreverse_i8:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    andi a1, a0, 15
-; RV64ZB-NEXT:    slli a1, a1, 4
-; RV64ZB-NEXT:    slli a0, a0, 56
-; RV64ZB-NEXT:    srli a0, a0, 60
-; RV64ZB-NEXT:    or a0, a0, a1
-; RV64ZB-NEXT:    andi a1, a0, 51
-; RV64ZB-NEXT:    slli a1, a1, 2
-; RV64ZB-NEXT:    srli a0, a0, 2
-; RV64ZB-NEXT:    andi a0, a0, 51
-; RV64ZB-NEXT:    or a0, a0, a1
-; RV64ZB-NEXT:    andi a1, a0, 85
-; RV64ZB-NEXT:    slli a1, a1, 1
-; RV64ZB-NEXT:    srli a0, a0, 1
-; RV64ZB-NEXT:    andi a0, a0, 85
-; RV64ZB-NEXT:    or a0, a0, a1
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bitreverse_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    andi a1, a0, 15
+; RV64ZBB-NEXT:    slli a1, a1, 4
+; RV64ZBB-NEXT:    slli a0, a0, 56
+; RV64ZBB-NEXT:    srli a0, a0, 60
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    andi a1, a0, 51
+; RV64ZBB-NEXT:    slli a1, a1, 2
+; RV64ZBB-NEXT:    srli a0, a0, 2
+; RV64ZBB-NEXT:    andi a0, a0, 51
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    andi a1, a0, 85
+; RV64ZBB-NEXT:    slli a1, a1, 1
+; RV64ZBB-NEXT:    srli a0, a0, 1
+; RV64ZBB-NEXT:    andi a0, a0, 85
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i8:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 24
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i8:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 56
+; RV64ZBKB-NEXT:    ret
   %tmp = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %tmp
 }
@@ -312,57 +326,71 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bitreverse_i16:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a1, a0, 12
-; RV32ZB-NEXT:    lui a2, 15
-; RV32ZB-NEXT:    addi a2, a2, 240
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    srli a0, a0, 20
-; RV32ZB-NEXT:    andi a0, a0, -241
-; RV32ZB-NEXT:    or a0, a0, a1
-; RV32ZB-NEXT:    srli a1, a0, 2
-; RV32ZB-NEXT:    lui a2, 3
-; RV32ZB-NEXT:    addi a2, a2, 819
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 1
-; RV32ZB-NEXT:    lui a2, 5
-; RV32ZB-NEXT:    addi a2, a2, 1365
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bitreverse_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 12
+; RV32ZBB-NEXT:    lui a2, 15
+; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    srli a0, a0, 20
+; RV32ZBB-NEXT:    andi a0, a0, -241
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 3
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 5
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bitreverse_i16:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a1, a0, 44
-; RV64ZB-NEXT:    lui a2, 15
-; RV64ZB-NEXT:    addiw a2, a2, 240
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    srli a0, a0, 52
-; RV64ZB-NEXT:    andi a0, a0, -241
-; RV64ZB-NEXT:    or a0, a0, a1
-; RV64ZB-NEXT:    srli a1, a0, 2
-; RV64ZB-NEXT:    lui a2, 3
-; RV64ZB-NEXT:    addiw a2, a2, 819
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 2
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 1
-; RV64ZB-NEXT:    lui a2, 5
-; RV64ZB-NEXT:    addiw a2, a2, 1365
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 1
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bitreverse_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 44
+; RV64ZBB-NEXT:    lui a2, 15
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 52
+; RV64ZBB-NEXT:    andi a0, a0, -241
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 3
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 5
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i16:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 16
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i16:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    ret
   %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
   ret i16 %tmp
 }
@@ -442,60 +470,73 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bitreverse_i32:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a1, a0, 4
-; RV32ZB-NEXT:    lui a2, 61681
-; RV32ZB-NEXT:    addi a2, a2, -241
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 2
-; RV32ZB-NEXT:    lui a2, 209715
-; RV32ZB-NEXT:    addi a2, a2, 819
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 1
-; RV32ZB-NEXT:    lui a2, 349525
-; RV32ZB-NEXT:    addi a2, a2, 1365
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bitreverse_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 61681
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 209715
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 349525
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bitreverse_i32:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a1, a0, 36
-; RV64ZB-NEXT:    lui a2, 61681
-; RV64ZB-NEXT:    addiw a2, a2, -241
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    srli a0, a0, 28
-; RV64ZB-NEXT:    lui a2, 986895
-; RV64ZB-NEXT:    addiw a2, a2, 240
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    sext.w a0, a0
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 2
-; RV64ZB-NEXT:    lui a2, 209715
-; RV64ZB-NEXT:    addiw a2, a2, 819
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 2
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 1
-; RV64ZB-NEXT:    lui a2, 349525
-; RV64ZB-NEXT:    addiw a2, a2, 1365
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 1
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bitreverse_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    lui a2, 61681
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 28
+; RV64ZBB-NEXT:    lui a2, 986895
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 209715
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 349525
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i32:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i32:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    ret
   %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
   ret i32 %tmp
 }
@@ -616,78 +657,94 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bitreverse_i64:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a1, a1
-; RV32ZB-NEXT:    srli a2, a1, 4
-; RV32ZB-NEXT:    lui a3, 61681
-; RV32ZB-NEXT:    addi a3, a3, -241
-; RV32ZB-NEXT:    and a2, a2, a3
-; RV32ZB-NEXT:    and a1, a1, a3
-; RV32ZB-NEXT:    slli a1, a1, 4
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    srli a2, a1, 2
-; RV32ZB-NEXT:    lui a4, 209715
-; RV32ZB-NEXT:    addi a4, a4, 819
-; RV32ZB-NEXT:    and a2, a2, a4
-; RV32ZB-NEXT:    and a1, a1, a4
-; RV32ZB-NEXT:    slli a1, a1, 2
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    srli a2, a1, 1
-; RV32ZB-NEXT:    lui a5, 349525
-; RV32ZB-NEXT:    addi a5, a5, 1365
-; RV32ZB-NEXT:    and a2, a2, a5
-; RV32ZB-NEXT:    and a1, a1, a5
-; RV32ZB-NEXT:    slli a1, a1, 1
-; RV32ZB-NEXT:    or a2, a2, a1
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a1, a0, 4
-; RV32ZB-NEXT:    and a1, a1, a3
-; RV32ZB-NEXT:    and a0, a0, a3
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 2
-; RV32ZB-NEXT:    and a1, a1, a4
-; RV32ZB-NEXT:    and a0, a0, a4
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 1
-; RV32ZB-NEXT:    and a1, a1, a5
-; RV32ZB-NEXT:    and a0, a0, a5
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a1, a1, a0
-; RV32ZB-NEXT:    mv a0, a2
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bitreverse_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a1, a1
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    lui a3, 61681
+; RV32ZBB-NEXT:    addi a3, a3, -241
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a2, a2, a1
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    and a0, a0, a3
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    and a0, a0, a4
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    and a0, a0, a5
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a1, a1, a0
+; RV32ZBB-NEXT:    mv a0, a2
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bitreverse_i64:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI6_0)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI6_0)(a1)
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a2, a0, 4
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI6_1)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI6_1)(a1)
-; RV64ZB-NEXT:    slli a0, a0, 4
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    srli a2, a0, 2
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI6_2)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI6_2)(a1)
-; RV64ZB-NEXT:    slli a0, a0, 2
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    srli a2, a0, 1
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    slli a0, a0, 1
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bitreverse_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI6_0)(a1)
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a2, a0, 4
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI6_1)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI6_1)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 2
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI6_2)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI6_2)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 1
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_i64:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    rev8 a1, a1
+; RV32ZBKB-NEXT:    brev8 a2, a1
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a1, a0
+; RV32ZBKB-NEXT:    mv a0, a2
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_i64:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
   %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
   ret i64 %tmp
 }
 
+; FIXME: Merge the away the two rev8s in the Zbkb code.
 define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i16:
 ; RV32I:       # %bb.0:
@@ -739,60 +796,79 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bswap_bitreverse_i16:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    srli a1, a0, 4
-; RV32ZB-NEXT:    lui a2, 1
-; RV32ZB-NEXT:    addi a2, a2, -241
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 2
-; RV32ZB-NEXT:    lui a2, 3
-; RV32ZB-NEXT:    addi a2, a2, 819
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 1
-; RV32ZB-NEXT:    lui a2, 5
-; RV32ZB-NEXT:    addi a2, a2, 1365
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bswap_bitreverse_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 1
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 3
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 5
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bswap_bitreverse_i16:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    srli a1, a0, 4
-; RV64ZB-NEXT:    lui a2, 1
-; RV64ZB-NEXT:    addiw a2, a2, -241
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 4
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 2
-; RV64ZB-NEXT:    lui a2, 3
-; RV64ZB-NEXT:    addiw a2, a2, 819
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 2
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 1
-; RV64ZB-NEXT:    lui a2, 5
-; RV64ZB-NEXT:    addiw a2, a2, 1365
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 1
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bswap_bitreverse_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 1
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 3
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 5
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bswap_bitreverse_i16:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 16
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 16
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bswap_bitreverse_i16:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    ret
   %tmp = call i16 @llvm.bswap.i16(i16 %a)
   %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp)
   ret i16 %tmp2
 }
 
+; FIXME: Merge the away the two rev8s in the Zbkb code.
 define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i32:
 ; RV32I:       # %bb.0:
@@ -844,55 +920,69 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bswap_bitreverse_i32:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    srli a1, a0, 4
-; RV32ZB-NEXT:    lui a2, 61681
-; RV32ZB-NEXT:    addi a2, a2, -241
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 2
-; RV32ZB-NEXT:    lui a2, 209715
-; RV32ZB-NEXT:    addi a2, a2, 819
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 1
-; RV32ZB-NEXT:    lui a2, 349525
-; RV32ZB-NEXT:    addi a2, a2, 1365
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bswap_bitreverse_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 61681
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 209715
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 349525
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bswap_bitreverse_i32:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    srli a1, a0, 4
-; RV64ZB-NEXT:    lui a2, 61681
-; RV64ZB-NEXT:    addiw a2, a2, -241
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 4
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 2
-; RV64ZB-NEXT:    lui a2, 209715
-; RV64ZB-NEXT:    addiw a2, a2, 819
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 2
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 1
-; RV64ZB-NEXT:    lui a2, 349525
-; RV64ZB-NEXT:    addiw a2, a2, 1365
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 1
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bswap_bitreverse_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 61681
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 209715
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 349525
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bswap_bitreverse_i32:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bswap_bitreverse_i32:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    ret
   %tmp = call i32 @llvm.bswap.i32(i32 %a)
   %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp)
   ret i32 %tmp2
@@ -964,75 +1054,87 @@ define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bswap_bitreverse_i64:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    srli a2, a0, 4
-; RV32ZB-NEXT:    lui a3, 61681
-; RV32ZB-NEXT:    addi a3, a3, -241
-; RV32ZB-NEXT:    and a2, a2, a3
-; RV32ZB-NEXT:    and a0, a0, a3
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a2, a0
-; RV32ZB-NEXT:    srli a2, a0, 2
-; RV32ZB-NEXT:    lui a4, 209715
-; RV32ZB-NEXT:    addi a4, a4, 819
-; RV32ZB-NEXT:    and a2, a2, a4
-; RV32ZB-NEXT:    and a0, a0, a4
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a2, a0
-; RV32ZB-NEXT:    srli a2, a0, 1
-; RV32ZB-NEXT:    lui a5, 349525
-; RV32ZB-NEXT:    addi a5, a5, 1365
-; RV32ZB-NEXT:    and a2, a2, a5
-; RV32ZB-NEXT:    and a0, a0, a5
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a2, a0
-; RV32ZB-NEXT:    srli a2, a1, 4
-; RV32ZB-NEXT:    and a2, a2, a3
-; RV32ZB-NEXT:    and a1, a1, a3
-; RV32ZB-NEXT:    slli a1, a1, 4
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    srli a2, a1, 2
-; RV32ZB-NEXT:    and a2, a2, a4
-; RV32ZB-NEXT:    and a1, a1, a4
-; RV32ZB-NEXT:    slli a1, a1, 2
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    srli a2, a1, 1
-; RV32ZB-NEXT:    and a2, a2, a5
-; RV32ZB-NEXT:    and a1, a1, a5
-; RV32ZB-NEXT:    slli a1, a1, 1
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bswap_bitreverse_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a2, a0, 4
+; RV32ZBB-NEXT:    lui a3, 61681
+; RV32ZBB-NEXT:    addi a3, a3, -241
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a0, a0, a3
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a0, a0, a4
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a0, a0, a5
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bswap_bitreverse_i64:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI9_0)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI9_0)(a1)
-; RV64ZB-NEXT:    srli a2, a0, 4
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI9_1)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI9_1)(a1)
-; RV64ZB-NEXT:    slli a0, a0, 4
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    srli a2, a0, 2
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI9_2)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI9_2)(a1)
-; RV64ZB-NEXT:    slli a0, a0, 2
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    srli a2, a0, 1
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    slli a0, a0, 1
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bswap_bitreverse_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI9_0)(a1)
+; RV64ZBB-NEXT:    srli a2, a0, 4
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI9_1)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI9_1)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 2
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI9_2)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI9_2)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 1
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bswap_bitreverse_i64:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a1, a1
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bswap_bitreverse_i64:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
   %tmp = call i64 @llvm.bswap.i64(i64 %a)
   %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp)
   ret i64 %tmp2
 }
 
+; FIXME: Merge the away the two rev8s in the Zbkb code.
 define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i16:
 ; RV32I:       # %bb.0:
@@ -1084,60 +1186,79 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bitreverse_bswap_i16:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    srli a1, a0, 4
-; RV32ZB-NEXT:    lui a2, 1
-; RV32ZB-NEXT:    addi a2, a2, -241
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 2
-; RV32ZB-NEXT:    lui a2, 3
-; RV32ZB-NEXT:    addi a2, a2, 819
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 1
-; RV32ZB-NEXT:    lui a2, 5
-; RV32ZB-NEXT:    addi a2, a2, 1365
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bitreverse_bswap_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 1
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 3
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 5
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bitreverse_bswap_i16:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    srli a1, a0, 4
-; RV64ZB-NEXT:    lui a2, 1
-; RV64ZB-NEXT:    addiw a2, a2, -241
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 4
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 2
-; RV64ZB-NEXT:    lui a2, 3
-; RV64ZB-NEXT:    addiw a2, a2, 819
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 2
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 1
-; RV64ZB-NEXT:    lui a2, 5
-; RV64ZB-NEXT:    addiw a2, a2, 1365
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slli a0, a0, 1
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bitreverse_bswap_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 1
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 3
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 5
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_bswap_i16:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 16
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 16
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_bswap_i16:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    ret
   %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
   %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp)
   ret i16 %tmp2
 }
 
+; FIXME: Merge the away the two rev8s in the Zbkb code.
 define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i32:
 ; RV32I:       # %bb.0:
@@ -1189,55 +1310,69 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bitreverse_bswap_i32:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    srli a1, a0, 4
-; RV32ZB-NEXT:    lui a2, 61681
-; RV32ZB-NEXT:    addi a2, a2, -241
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 2
-; RV32ZB-NEXT:    lui a2, 209715
-; RV32ZB-NEXT:    addi a2, a2, 819
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    srli a1, a0, 1
-; RV32ZB-NEXT:    lui a2, 349525
-; RV32ZB-NEXT:    addi a2, a2, 1365
-; RV32ZB-NEXT:    and a1, a1, a2
-; RV32ZB-NEXT:    and a0, a0, a2
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a1, a0
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bitreverse_bswap_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 61681
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 209715
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 349525
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bitreverse_bswap_i32:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    srli a1, a0, 4
-; RV64ZB-NEXT:    lui a2, 61681
-; RV64ZB-NEXT:    addiw a2, a2, -241
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 4
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 2
-; RV64ZB-NEXT:    lui a2, 209715
-; RV64ZB-NEXT:    addiw a2, a2, 819
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 2
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    srli a1, a0, 1
-; RV64ZB-NEXT:    lui a2, 349525
-; RV64ZB-NEXT:    addiw a2, a2, 1365
-; RV64ZB-NEXT:    and a1, a1, a2
-; RV64ZB-NEXT:    and a0, a0, a2
-; RV64ZB-NEXT:    slliw a0, a0, 1
-; RV64ZB-NEXT:    or a0, a1, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bitreverse_bswap_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 61681
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 209715
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 349525
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_bswap_i32:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_bswap_i32:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    ret
   %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
   %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp)
   ret i32 %tmp2
@@ -1309,70 +1444,81 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32ZB-LABEL: test_bitreverse_bswap_i64:
-; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    srli a2, a0, 4
-; RV32ZB-NEXT:    lui a3, 61681
-; RV32ZB-NEXT:    addi a3, a3, -241
-; RV32ZB-NEXT:    and a2, a2, a3
-; RV32ZB-NEXT:    and a0, a0, a3
-; RV32ZB-NEXT:    slli a0, a0, 4
-; RV32ZB-NEXT:    or a0, a2, a0
-; RV32ZB-NEXT:    srli a2, a0, 2
-; RV32ZB-NEXT:    lui a4, 209715
-; RV32ZB-NEXT:    addi a4, a4, 819
-; RV32ZB-NEXT:    and a2, a2, a4
-; RV32ZB-NEXT:    and a0, a0, a4
-; RV32ZB-NEXT:    slli a0, a0, 2
-; RV32ZB-NEXT:    or a0, a2, a0
-; RV32ZB-NEXT:    srli a2, a0, 1
-; RV32ZB-NEXT:    lui a5, 349525
-; RV32ZB-NEXT:    addi a5, a5, 1365
-; RV32ZB-NEXT:    and a2, a2, a5
-; RV32ZB-NEXT:    and a0, a0, a5
-; RV32ZB-NEXT:    slli a0, a0, 1
-; RV32ZB-NEXT:    or a0, a2, a0
-; RV32ZB-NEXT:    srli a2, a1, 4
-; RV32ZB-NEXT:    and a2, a2, a3
-; RV32ZB-NEXT:    and a1, a1, a3
-; RV32ZB-NEXT:    slli a1, a1, 4
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    srli a2, a1, 2
-; RV32ZB-NEXT:    and a2, a2, a4
-; RV32ZB-NEXT:    and a1, a1, a4
-; RV32ZB-NEXT:    slli a1, a1, 2
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    srli a2, a1, 1
-; RV32ZB-NEXT:    and a2, a2, a5
-; RV32ZB-NEXT:    and a1, a1, a5
-; RV32ZB-NEXT:    slli a1, a1, 1
-; RV32ZB-NEXT:    or a1, a2, a1
-; RV32ZB-NEXT:    ret
+; RV32ZBB-LABEL: test_bitreverse_bswap_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a2, a0, 4
+; RV32ZBB-NEXT:    lui a3, 61681
+; RV32ZBB-NEXT:    addi a3, a3, -241
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a0, a0, a3
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a0, a0, a4
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a0, a0, a5
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    ret
 ;
-; RV64ZB-LABEL: test_bitreverse_bswap_i64:
-; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI12_0)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
-; RV64ZB-NEXT:    srli a2, a0, 4
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI12_1)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI12_1)(a1)
-; RV64ZB-NEXT:    slli a0, a0, 4
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    srli a2, a0, 2
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    lui a1, %hi(.LCPI12_2)
-; RV64ZB-NEXT:    ld a1, %lo(.LCPI12_2)(a1)
-; RV64ZB-NEXT:    slli a0, a0, 2
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    srli a2, a0, 1
-; RV64ZB-NEXT:    and a2, a2, a1
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    slli a0, a0, 1
-; RV64ZB-NEXT:    or a0, a2, a0
-; RV64ZB-NEXT:    ret
+; RV64ZBB-LABEL: test_bitreverse_bswap_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
+; RV64ZBB-NEXT:    srli a2, a0, 4
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_1)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_1)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 2
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_2)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_2)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 1
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    ret
+;
+; RV32ZBKB-LABEL: test_bitreverse_bswap_i64:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    brev8 a1, a1
+; RV32ZBKB-NEXT:    ret
+;
+; RV64ZBKB-LABEL: test_bitreverse_bswap_i64:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
   %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
   %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp)
   ret i64 %tmp2


        


More information about the llvm-commits mailing list