[llvm-branch-commits] [llvm] ffe85d6 - [RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbp asm instructions

Hans Wennborg via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jul 27 04:09:08 PDT 2020


Author: lewis-revill
Date: 2020-07-27T13:07:37+02:00
New Revision: ffe85d6c03b91cf9294c7ec1d8192d4cc337cdfd

URL: https://github.com/llvm/llvm-project/commit/ffe85d6c03b91cf9294c7ec1d8192d4cc337cdfd
DIFF: https://github.com/llvm/llvm-project/commit/ffe85d6c03b91cf9294c7ec1d8192d4cc337cdfd.diff

LOG: [RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbp asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions from the permutation subset (zbp subextension) of
the experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79871

(cherry picked from commit 31b52b4345e36b169a2b6a89eac44651f59889dd)

Added: 
    llvm/test/CodeGen/RISCV/rv32Zbp.ll
    llvm/test/CodeGen/RISCV/rv64Zbp.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoB.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index fb44f826eb6c..c89bb21c9701 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -151,7 +151,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::ROTL, XLenVT, Expand);
   setOperationAction(ISD::ROTR, XLenVT, Expand);
-  setOperationAction(ISD::BSWAP, XLenVT, Expand);
+
+  if (!Subtarget.hasStdExtZbp())
+    setOperationAction(ISD::BSWAP, XLenVT, Expand);
 
   if (!Subtarget.hasStdExtZbb()) {
     setOperationAction(ISD::CTTZ, XLenVT, Expand);
@@ -159,6 +161,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTPOP, XLenVT, Expand);
   }
 
+  if (Subtarget.hasStdExtZbp())
+    setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
+
   ISD::CondCode FPCCToExtend[] = {
       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index dc3d6cbb4fe8..09d5f1ef856a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -651,6 +651,97 @@ def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbp, IsRV32] in {
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
+          (GORCI GPR:$rs1, (i32 1))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC))),
+          (GORCI GPR:$rs1, (i32 2))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0))),
+          (GORCI GPR:$rs1, (i32 4))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00))),
+          (GORCI GPR:$rs1, (i32 8))>;
+def : Pat<(or (or (srl GPR:$rs1, (i32 16)), GPR:$rs1),
+              (shl GPR:$rs1, (i32 16))),
+          (GORCI GPR:$rs1, (i32 16))>;
+} // Predicates = [HasStdExtZbp, IsRV32]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA))),
+          (GORCI GPR:$rs1, (i64 1))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC))),
+          (GORCI GPR:$rs1, (i64 2))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0))),
+          (GORCI GPR:$rs1, (i64 4))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00))),
+          (GORCI GPR:$rs1, (i64 8))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000))),
+          (GORCI GPR:$rs1, (i64 16))>;
+def : Pat<(or (or (srl GPR:$rs1, (i64 32)), GPR:$rs1),
+              (shl GPR:$rs1, (i64 32))),
+          (GORCI GPR:$rs1, (i64 32))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV32] in {
+def : Pat<(or (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA)),
+              (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555))),
+          (GREVI GPR:$rs1, (i32 1))>;
+def : Pat<(or (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC)),
+              (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333))),
+          (GREVI GPR:$rs1, (i32 2))>;
+def : Pat<(or (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0)),
+              (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F))),
+          (GREVI GPR:$rs1, (i32 4))>;
+def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)),
+              (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))),
+          (GREVI GPR:$rs1, (i32 8))>;
+def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
+def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))),
+          (GREVI GPR:$rs1, (i32 16))>;
+def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
+def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
+def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
+} // Predicates = [HasStdExtZbp, IsRV32]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA)),
+              (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555))),
+          (GREVI GPR:$rs1, (i64 1))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC)),
+              (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333))),
+          (GREVI GPR:$rs1, (i64 2))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0)),
+              (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F))),
+          (GREVI GPR:$rs1, (i64 4))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)),
+              (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF))),
+          (GREVI GPR:$rs1, (i64 8))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)),
+              (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))),
+          (GREVI GPR:$rs1, (i64 16))>;
+def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))),
+          (GREVI GPR:$rs1, (i64 32))>;
+def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
+def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
+def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb] in {
 def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
 def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
 def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>;
@@ -681,6 +772,48 @@ def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
           (MAXU  GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbb]
 
+let Predicates = [HasStdExtZbp, IsRV32] in {
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
+                  (and GPR:$rs1, (i32 0xFF0000FF))),
+              (and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))),
+          (SHFLI GPR:$rs1, (i32 8))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)),
+                  (and GPR:$rs1, (i32 0xF00FF00F))),
+              (and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))),
+          (SHFLI GPR:$rs1, (i32 4))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)),
+                  (and GPR:$rs1, (i32 0xC3C3C3C3))),
+              (and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))),
+          (SHFLI GPR:$rs1, (i32 2))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)),
+                  (and GPR:$rs1, (i32 0x99999999))),
+              (and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))),
+          (SHFLI GPR:$rs1, (i32 1))>;
+} // Predicates = [HasStdExtZbp, IsRV32]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)),
+                  (and GPR:$rs1, (i64 0xFFFF00000000FFFF))),
+              (and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))),
+          (SHFLI GPR:$rs1, (i64 16))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)),
+                  (and GPR:$rs1, (i64 0xFF0000FFFF0000FF))),
+              (and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))),
+          (SHFLI GPR:$rs1, (i64 8))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)),
+                  (and GPR:$rs1, (i64 0xF00FF00FF00FF00F))),
+              (and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))),
+          (SHFLI GPR:$rs1, (i64 4))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)),
+                  (and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))),
+              (and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))),
+          (SHFLI GPR:$rs1, (i64 2))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)),
+                  (and GPR:$rs1, (i64 0x9999999999999999))),
+              (and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))),
+          (SHFLI GPR:$rs1, (i64 1))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
 let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)),
           (ADDIWU GPR:$rs, simm12:$simm12)>;
@@ -702,6 +835,63 @@ def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 1))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 2))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 4))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 8))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 16))>;
+def : Pat<(sext_inreg (or (or (srl (and GPR:$rs1, (i64 0xFFFF0000)), (i64 16)),
+                              GPR:$rs1),
+                          (shl GPR:$rs1, (i64 16))), i32),
+          (GORCIW GPR:$rs1, (i64 16))>;
+
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA)),
+                          (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 1))>;
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC)),
+                          (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 2))>;
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0)),
+                          (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 4))>;
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00)),
+                          (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 8))>;
+def : Pat<(sext_inreg (or (shl GPR:$rs1, (i64 16)),
+                          (srl (and GPR:$rs1, 0xFFFF0000), (i64 16))), i32),
+          (GREVIW GPR:$rs1, (i64 16))>;
+def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>;
+def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
           (CLZW GPR:$rs1)>;
 // We don't pattern-match CTZW here as it has the same pattern and result as

diff  --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
new file mode 100644
index 000000000000..8769ce77337c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
@@ -0,0 +1,1245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBP
+
+define i32 @gorc1_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc1_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    lui a3, 349525
+; RV32I-NEXT:    addi a3, a3, 1365
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc1_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc.p a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc1_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc.p a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc1_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc1_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a3, a1, 1
+; RV32I-NEXT:    lui a4, 699051
+; RV32I-NEXT:    addi a4, a4, -1366
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 1
+; RV32I-NEXT:    srli a5, a0, 1
+; RV32I-NEXT:    lui a3, 349525
+; RV32I-NEXT:    addi a3, a3, 1365
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc1_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc.p a0, a0
+; RV32IB-NEXT:    orc.p a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc1_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc.p a0, a0
+; RV32IBP-NEXT:    orc.p a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc2_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc2_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    lui a2, 838861
+; RV32I-NEXT:    addi a2, a2, -820
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    lui a3, 209715
+; RV32I-NEXT:    addi a3, a3, 819
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc2_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc2.n a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc2_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc2.n a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc2_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc2_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a3, a1, 2
+; RV32I-NEXT:    lui a4, 838861
+; RV32I-NEXT:    addi a4, a4, -820
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 2
+; RV32I-NEXT:    srli a5, a0, 2
+; RV32I-NEXT:    lui a3, 209715
+; RV32I-NEXT:    addi a3, a3, 819
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc2_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc2.n a0, a0
+; RV32IB-NEXT:    orc2.n a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc2_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc2.n a0, a0
+; RV32IBP-NEXT:    orc2.n a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc4_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc4_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 4
+; RV32I-NEXT:    lui a2, 986895
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc4_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc4.b a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc4_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc4.b a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc4_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc4_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 4
+; RV32I-NEXT:    slli a3, a1, 4
+; RV32I-NEXT:    lui a4, 986895
+; RV32I-NEXT:    addi a4, a4, 240
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 4
+; RV32I-NEXT:    srli a5, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc4_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc4.b a0, a0
+; RV32IB-NEXT:    orc4.b a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc4_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc4.b a0, a0
+; RV32IBP-NEXT:    orc4.b a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc8_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc8_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    lui a2, 1044496
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    addi a3, a3, 255
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc8_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc8.h a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc8_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc8.h a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc8_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc8_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    slli a3, a1, 8
+; RV32I-NEXT:    lui a4, 1044496
+; RV32I-NEXT:    addi a4, a4, -256
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 8
+; RV32I-NEXT:    srli a5, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    addi a3, a3, 255
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc8_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc8.h a0, a0
+; RV32IB-NEXT:    orc8.h a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc8_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc8.h a0, a0
+; RV32IBP-NEXT:    orc8.h a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc16_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc16_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srli a2, a0, 16
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc16_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc16 a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc16_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc16 a0, a0
+; RV32IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc16_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc16_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a1, 16
+; RV32I-NEXT:    slli a3, a0, 16
+; RV32I-NEXT:    srli a4, a0, 16
+; RV32I-NEXT:    srli a5, a1, 16
+; RV32I-NEXT:    or a1, a5, a1
+; RV32I-NEXT:    or a0, a4, a0
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc16_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc16 a0, a0
+; RV32IB-NEXT:    orc16 a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc16_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc16 a0, a0
+; RV32IBP-NEXT:    orc16 a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @grev1_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev1_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev1_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev.p a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev1_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev.p a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev1_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev1_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a3, a1, 1
+; RV32I-NEXT:    lui a4, 699051
+; RV32I-NEXT:    addi a4, a4, -1366
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    lui a4, 349525
+; RV32I-NEXT:    addi a4, a4, 1365
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev1_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev.p a0, a0
+; RV32IB-NEXT:    rev.p a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev1_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev.p a0, a0
+; RV32IBP-NEXT:    rev.p a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev2_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev2_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    lui a2, 838861
+; RV32I-NEXT:    addi a2, a2, -820
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev2_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev2.n a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev2_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev2.n a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev2_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev2_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a3, a1, 2
+; RV32I-NEXT:    lui a4, 838861
+; RV32I-NEXT:    addi a4, a4, -820
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    srli a1, a1, 2
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev2_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev2.n a0, a0
+; RV32IB-NEXT:    rev2.n a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev2_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev2.n a0, a0
+; RV32IBP-NEXT:    rev2.n a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev4_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev4_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 4
+; RV32I-NEXT:    lui a2, 986895
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev4_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev4.b a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev4_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev4.b a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev4_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev4_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 4
+; RV32I-NEXT:    slli a3, a1, 4
+; RV32I-NEXT:    lui a4, 986895
+; RV32I-NEXT:    addi a4, a4, 240
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    srli a1, a1, 4
+; RV32I-NEXT:    lui a4, 61681
+; RV32I-NEXT:    addi a4, a4, -241
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev4_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev4.b a0, a0
+; RV32IB-NEXT:    rev4.b a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev4_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev4.b a0, a0
+; RV32IBP-NEXT:    rev4.b a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev8_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev8_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    lui a2, 1044496
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    lui a2, 4080
+; RV32I-NEXT:    addi a2, a2, 255
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev8_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8.h a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev8_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8.h a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev8_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev8_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    slli a3, a1, 8
+; RV32I-NEXT:    lui a4, 1044496
+; RV32I-NEXT:    addi a4, a4, -256
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    srli a1, a1, 8
+; RV32I-NEXT:    lui a4, 4080
+; RV32I-NEXT:    addi a4, a4, 255
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev8_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8.h a0, a0
+; RV32IB-NEXT:    rev8.h a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev8_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8.h a0, a0
+; RV32IBP-NEXT:    rev8.h a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev16_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev16_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev16_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rori a0, a0, 16
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev16_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rori a0, a0, 16
+; RV32IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev16_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev16_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a1, 16
+; RV32I-NEXT:    srli a3, a0, 16
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    srli a1, a1, 16
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev16_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rori a0, a0, 16
+; RV32IB-NEXT:    rori a1, a1, 16
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev16_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rori a0, a0, 16
+; RV32IBP-NEXT:    rori a1, a1, 16
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define i32 @bswap_i32(i32 %a) nounwind {
+; RV32I-LABEL: bswap_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bswap_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8 a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bswap_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8 a0, a0
+; RV32IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV32I-LABEL: bswap_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    lui a5, 4080
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    or a2, a1, a2
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    slli a3, a0, 8
+; RV32I-NEXT:    and a3, a3, a5
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bswap_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8 a2, a1
+; RV32IB-NEXT:    rev8 a1, a0
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bswap_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8 a2, a1
+; RV32IBP-NEXT:    rev8 a1, a0
+; RV32IBP-NEXT:    mv a0, a2
+; RV32IBP-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
+
+declare i32 @llvm.bitreverse.i32(i32)
+
+define i32 @bitreverse_i32(i32 %a) nounwind {
+; RV32I-LABEL: bitreverse_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    lui a2, 986895
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    lui a2, 838861
+; RV32I-NEXT:    addi a2, a2, -820
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bitreverse_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bitreverse_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev a0, a0
+; RV32IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bitreverse.i64(i64)
+
+define i64 @bitreverse_i64(i64 %a) nounwind {
+; RV32I-LABEL: bitreverse_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi t0, a3, -256
+; RV32I-NEXT:    and a2, a2, t0
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    lui a6, 4080
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi t1, a2, -241
+; RV32I-NEXT:    and a2, a1, t1
+; RV32I-NEXT:    slli a2, a2, 4
+; RV32I-NEXT:    lui a5, 986895
+; RV32I-NEXT:    addi t2, a5, 240
+; RV32I-NEXT:    and a1, a1, t2
+; RV32I-NEXT:    srli a1, a1, 4
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi t3, a2, 819
+; RV32I-NEXT:    and a3, a1, t3
+; RV32I-NEXT:    slli a3, a3, 2
+; RV32I-NEXT:    lui a4, 838861
+; RV32I-NEXT:    addi a4, a4, -820
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    srli a1, a1, 2
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    lui a3, 349525
+; RV32I-NEXT:    addi a3, a3, 1365
+; RV32I-NEXT:    and a5, a1, a3
+; RV32I-NEXT:    slli a5, a5, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    or a7, a1, a5
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, t0
+; RV32I-NEXT:    srli a5, a0, 24
+; RV32I-NEXT:    or a1, a1, a5
+; RV32I-NEXT:    slli a5, a0, 8
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a5
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    and a1, a0, t1
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    and a0, a0, t2
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    and a1, a0, t3
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    and a1, a0, a3
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a7
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bitreverse_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev a2, a1
+; RV32IB-NEXT:    rev a1, a0
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bitreverse_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev a2, a1
+; RV32IBP-NEXT:    rev a1, a0
+; RV32IBP-NEXT:    mv a0, a2
+; RV32IBP-NEXT:    ret
+  %1 = call i64 @llvm.bitreverse.i64(i64 %a)
+  ret i64 %1
+}
+
+define i32 @shfl1_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl1_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 629146
+; RV32I-NEXT:    addi a1, a1, -1639
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    lui a3, 279620
+; RV32I-NEXT:    addi a3, a3, 1092
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    lui a2, 139810
+; RV32I-NEXT:    addi a2, a2, 546
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl1_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip.n a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl1_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip.n a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -1717986919
+  %shl = shl i32 %a, 1
+  %and1 = and i32 %shl, 1145324612
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 1
+  %and2 = and i32 %shr, 572662306
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl1_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl1_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 629146
+; RV32I-NEXT:    addi a2, a2, -1639
+; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 1
+; RV32I-NEXT:    slli a5, a0, 1
+; RV32I-NEXT:    lui a3, 279620
+; RV32I-NEXT:    addi a3, a3, 1092
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    or a3, a5, a6
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    lui a4, 139810
+; RV32I-NEXT:    addi a4, a4, 546
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl1_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip.n a0, a0
+; RV32IB-NEXT:    zip.n a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl1_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip.n a0, a0
+; RV32IBP-NEXT:    zip.n a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -7378697629483820647
+  %shl = shl i64 %a, 1
+  %and1 = and i64 %shl, 4919131752989213764
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 1
+  %and2 = and i64 %shr, 2459565876494606882
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i32 @shfl2_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl2_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 801852
+; RV32I-NEXT:    addi a1, a1, 963
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    lui a3, 197379
+; RV32I-NEXT:    addi a3, a3, 48
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    lui a2, 49345
+; RV32I-NEXT:    addi a2, a2, -1012
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl2_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip2.b a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl2_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip2.b a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -1010580541
+  %shl = shl i32 %a, 2
+  %and1 = and i32 %shl, 808464432
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 2
+  %and2 = and i32 %shr, 202116108
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl2_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl2_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 801852
+; RV32I-NEXT:    addi a2, a2, 963
+; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 2
+; RV32I-NEXT:    slli a5, a0, 2
+; RV32I-NEXT:    lui a3, 197379
+; RV32I-NEXT:    addi a3, a3, 48
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    or a3, a5, a6
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    srli a1, a1, 2
+; RV32I-NEXT:    lui a4, 49345
+; RV32I-NEXT:    addi a4, a4, -1012
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl2_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip2.b a0, a0
+; RV32IB-NEXT:    zip2.b a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl2_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip2.b a0, a0
+; RV32IBP-NEXT:    zip2.b a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -4340410370284600381
+  %shl = shl i64 %a, 2
+  %and1 = and i64 %shl, 3472328296227680304
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 2
+  %and2 = and i64 %shr, 868082074056920076
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i32 @shfl4_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl4_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 983295
+; RV32I-NEXT:    addi a1, a1, 15
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61441
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    lui a2, 3840
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl4_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip4.h a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl4_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip4.h a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -267390961
+  %shl = shl i32 %a, 4
+  %and1 = and i32 %shl, 251662080
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 4
+  %and2 = and i32 %shr, 15728880
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl4_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl4_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 983295
+; RV32I-NEXT:    addi a2, a2, 15
+; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 4
+; RV32I-NEXT:    slli a5, a0, 4
+; RV32I-NEXT:    lui a3, 61441
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    or a3, a5, a6
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    srli a1, a1, 4
+; RV32I-NEXT:    lui a4, 3840
+; RV32I-NEXT:    addi a4, a4, 240
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl4_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip4.h a0, a0
+; RV32IB-NEXT:    zip4.h a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl4_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip4.h a0, a0
+; RV32IBP-NEXT:    zip4.h a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -1148435428713435121
+  %shl = shl i64 %a, 4
+  %and1 = and i64 %shl, 1080880403494997760
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 4
+  %and2 = and i64 %shr, 67555025218437360
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i32 @shfl8_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl8_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 1044480
+; RV32I-NEXT:    addi a1, a1, 255
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl8_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip8 a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl8_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip8 a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -16776961
+  %shl = shl i32 %a, 8
+  %and1 = and i32 %shl, 16711680
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 8
+  %and2 = and i32 %shr, 65280
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl8_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 1044480
+; RV32I-NEXT:    addi a2, a2, 255
+; RV32I-NEXT:    and a3, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    slli a5, a0, 8
+; RV32I-NEXT:    lui a6, 4080
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a2, a4, a2
+; RV32I-NEXT:    or a3, a5, a3
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    srli a1, a1, 8
+; RV32I-NEXT:    lui a4, 16
+; RV32I-NEXT:    addi a4, a4, -256
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl8_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip8 a0, a0
+; RV32IB-NEXT:    zip8 a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl8_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip8 a0, a0
+; RV32IBP-NEXT:    zip8 a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -72056494543077121
+  %shl = shl i64 %a, 8
+  %and1 = and i64 %shl, 71776119077928960
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 8
+  %and2 = and i64 %shr, 280375465148160
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}

diff  --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
new file mode 100644
index 000000000000..ae467efaab83
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
@@ -0,0 +1,1343 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBP
+
+define signext i32 @gorc1_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc1_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 171
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    lui a3, 349525
+; RV64I-NEXT:    addiw a3, a3, 1365
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc1_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc1_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc1_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc1_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 1026731
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    lui a3, 21845
+; RV64I-NEXT:    addiw a3, a3, 1365
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 1365
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 1365
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 1365
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc1_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc.p a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc1_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc.p a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 205
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    lui a3, 209715
+; RV64I-NEXT:    addiw a3, a3, 819
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc2_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 2
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc2_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 2
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc2_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc2_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    lui a3, 13107
+; RV64I-NEXT:    addiw a3, a3, 819
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 819
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 819
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 819
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc2_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc2.n a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc2_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc2.n a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc4_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc4_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 241
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    lui a3, 61681
+; RV64I-NEXT:    addiw a3, a3, -241
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc4_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 4
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc4_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 4
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc4_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc4_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    lui a3, 3855
+; RV64I-NEXT:    addiw a3, a3, 241
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, -241
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 241
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, -241
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc4_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc4.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc4_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc4.b a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc8_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc8_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    lui a3, 4080
+; RV64I-NEXT:    addiw a3, a3, 255
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc8_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 8
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc8_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 8
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc8_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc8_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 1044496
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    lui a3, 4080
+; RV64I-NEXT:    addiw a3, a3, 255
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 255
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 255
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc8_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc8.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc8_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc8.h a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc16_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc16_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    srliw a2, a0, 16
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc16_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 16
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc16_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 16
+; RV64IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc16_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc16_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    addiw a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 16
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addiw a3, a3, -1
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 1
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, -1
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc16_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc16.w a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc16_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc16.w a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i64 @gorc32(i64 %a) nounwind {
+; RV64I-LABEL: gorc32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    srli a2, a0, 32
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc32 a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc32 a0, a0
+; RV64IBP-NEXT:    ret
+  %shl = shl i64 %a, 32
+  %shr = lshr i64 %a, 32
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @grev1_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev1_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 171
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev1_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev1_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev1_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev1_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 1026731
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    lui a2, 21845
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev1_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev.p a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev1_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev.p a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 205
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev2_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 2
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev2_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 2
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev2_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev2_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    lui a2, 13107
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev2_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev2.n a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev2_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev2.n a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev4_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev4_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 241
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev4_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 4
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev4_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 4
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev4_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev4_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    lui a2, 3855
+; RV64I-NEXT:    addiw a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev4_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev4.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev4_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev4.b a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev8_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev8_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    addiw a2, a2, 255
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev8_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 8
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev8_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 8
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev8_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev8_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 1044496
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    addiw a2, a2, 255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, 255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, 255
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev8_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev8.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev8_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev8.h a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev16_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev16_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    srliw a0, a0, 16
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev16_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 16
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev16_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 16
+; RV64IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev16_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev16_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    addiw a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 16
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev16_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev16.w a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev16_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev16.w a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i64 @grev32(i64 %a) nounwind {
+; RV64I-LABEL: grev32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rori a0, a0, 32
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rori a0, a0, 32
+; RV64IBP-NEXT:    ret
+  %shl = shl i64 %a, 32
+  %shr = lshr i64 %a, 32
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    addi a2, zero, 255
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    slli a3, a0, 24
+; RV64I-NEXT:    slli a4, a2, 40
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    slli a3, a0, 40
+; RV64I-NEXT:    slli a2, a2, 48
+; RV64I-NEXT:    and a2, a3, a2
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bswap_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 24
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bswap_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 24
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 24
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a2, a0, 40
+; RV64I-NEXT:    lui a4, 16
+; RV64I-NEXT:    addiw a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srli a4, a0, 56
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slli a4, a0, 24
+; RV64I-NEXT:    slli a5, a3, 40
+; RV64I-NEXT:    and a4, a4, a5
+; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    slli a3, a3, 48
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bswap_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev8 a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bswap_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev8 a0, a0
+; RV64IBP-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
+
+declare i32 @llvm.bitreverse.i32(i32)
+
+define signext i32 @bitreverse_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bitreverse_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 24
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a2, a0, 40
+; RV64I-NEXT:    lui a4, 16
+; RV64I-NEXT:    addiw a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srli a4, a0, 56
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slli a4, a0, 24
+; RV64I-NEXT:    slli a5, a3, 40
+; RV64I-NEXT:    and a4, a4, a5
+; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    slli a3, a3, 48
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    lui a2, 873813
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 33
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bitreverse_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 31
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bitreverse_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 31
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bitreverse.i64(i64)
+
+define i64 @bitreverse_i64(i64 %a) nounwind {
+; RV64I-LABEL: bitreverse_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 24
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a2, a0, 40
+; RV64I-NEXT:    lui a4, 16
+; RV64I-NEXT:    addiw a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srli a4, a0, 56
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slli a4, a0, 24
+; RV64I-NEXT:    slli a5, a3, 40
+; RV64I-NEXT:    and a4, a4, a5
+; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    slli a3, a3, 48
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 21845
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 1365
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    lui a2, 1026731
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bitreverse_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bitreverse_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev a0, a0
+; RV64IBP-NEXT:    ret
+  %1 = call i64 @llvm.bitreverse.i64(i64 %a)
+  ret i64 %1
+}
+
+; There's no [un]shfliw instruction as slliu.w occupies the encoding slot that
+; would be occupied by shfliw.
+
+define i64 @shfl1_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl1_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1035469
+; RV64I-NEXT:    addiw a1, a1, -819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -819
+; RV64I-NEXT:    slli a1, a1, 13
+; RV64I-NEXT:    addi a1, a1, -1639
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    lui a3, 4369
+; RV64I-NEXT:    addiw a3, a3, 273
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 273
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 273
+; RV64I-NEXT:    slli a4, a3, 14
+; RV64I-NEXT:    addi a4, a4, 1092
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    slli a2, a3, 13
+; RV64I-NEXT:    addi a2, a2, 546
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl1_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip.n a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl1_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip.n a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -7378697629483820647
+  %shl = shl i64 %a, 1
+  %and1 = and i64 %shl, 4919131752989213764
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 1
+  %and2 = and i64 %shr, 2459565876494606882
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl2_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl2_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1044721
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 14
+; RV64I-NEXT:    addi a1, a1, 963
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 2
+; RV64I-NEXT:    lui a3, 48
+; RV64I-NEXT:    addiw a3, a3, 771
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 771
+; RV64I-NEXT:    slli a4, a3, 16
+; RV64I-NEXT:    addi a4, a4, 771
+; RV64I-NEXT:    slli a4, a4, 12
+; RV64I-NEXT:    addi a4, a4, 48
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    slli a2, a3, 14
+; RV64I-NEXT:    addi a2, a2, 193
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1012
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl2_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip2.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl2_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip2.b a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -4340410370284600381
+  %shl = shl i64 %a, 2
+  %and1 = and i64 %shl, 3472328296227680304
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 2
+  %and2 = and i64 %shr, 868082074056920076
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl4_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl4_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048560
+; RV64I-NEXT:    addiw a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 15
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 4
+; RV64I-NEXT:    lui a3, 240
+; RV64I-NEXT:    addiw a3, a3, 15
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 15
+; RV64I-NEXT:    slli a4, a3, 12
+; RV64I-NEXT:    addi a4, a4, 1
+; RV64I-NEXT:    slli a4, a4, 12
+; RV64I-NEXT:    addi a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    slli a2, a3, 20
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl4_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip4.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl4_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip4.h a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -1148435428713435121
+  %shl = shl i64 %a, 4
+  %and1 = and i64 %shl, 1080880403494997760
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 4
+  %and2 = and i64 %shr, 67555025218437360
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl8_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048560
+; RV64I-NEXT:    addiw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    slli a1, a1, 24
+; RV64I-NEXT:    addi a1, a1, 255
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    addi a4, a4, 255
+; RV64I-NEXT:    slli a4, a4, 16
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    slli a2, a3, 24
+; RV64I-NEXT:    addi a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl8_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip8.w a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl8_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip8.w a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -72056494543077121
+  %shl = shl i64 %a, 8
+  %and1 = and i64 %shl, 71776119077928960
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 8
+  %and2 = and i64 %shr, 280375465148160
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl16(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a1, zero, -1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    addi a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 16
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addiw a3, a3, -1
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 16
+; RV64I-NEXT:    slli a2, a3, 16
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl16:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip16 a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl16:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip16 a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -281474976645121
+  %shl = shl i64 %a, 16
+  %and1 = and i64 %shl, 281470681743360
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 16
+  %and2 = and i64 %shr, 4294901760
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}


        


More information about the llvm-branch-commits mailing list